Just chipping in a little bit, one of the bigger annoyances when importing data from Excel spreadsheets is the lack of metadata functions/view, think thousands of files with a time stamp in the worksheet name:pinch: Dealt with this in the passed by reversing the BIFF5 format, came across similar situation recently so I re-wrote from memory (as I couldn't find the original) the initial procedure and added support for the XSLX. The code is below, enjoy
😎
USE Test;
GO
GO
CREATE PROCEDURE dbo.EE_LIST_EXCEL_WORKSHEETS
(
@FILE_PATH NVARCHAR(2048) = NULL
)
AS
/*
Procedure dbo.EE_LIST_EXCEL_WORKSHEETS
Initial coding ca. 1996, Eirikur Eiriksson
Re-written 2015-03-03
Description: List visible worksheets in an Excel workbook (BIFF8/XLSX)
Dependencies: The XLSX file format is a zipped multi document XML, the procedure
needs an decompression CLR function, the one used here is a part of
the SQL# from http://www.sqlsharp.com/
USAGE
DECLARE @FILE_PATH NVARCHAR(2048) = N'C:\INCOMING_DATA\DROP_FOLDER\Sample Spreadsheet for Import.xlsx';
EXEC dbo.EE_LIST_EXCEL_WORKSHEETS @FILE_PATH;
*/
SET NOCOUNT ON;
/*
Very simple input validation
*/
IF @FILE_PATH IS NULL OR LEN(@FILE_PATH) < 6 RETURN;
/*
A table variable to store the binary file data
*/
DECLARE @BINBLOB TABLE
(
BINDATA VARBINARY(MAX) NOT NULL
,IS_XML_ZIP AS (SIGN(CHARINDEX(0x504B030414,BINDATA,1)))
,IS_XSL AS (SIGN(CHARINDEX(0xD0CF11E0A1B11AE1,BINDATA,1)))
);
DECLARE @SQL_STR NVARCHAR(MAX) = N'SELECT x.BulkColumn FROM OPENROWSET(
BULK ' + NCHAR(39) + @FILE_PATH + NCHAR(39) + N',
SINGLE_BLOB) AS x;'
INSERT INTO @BINBLOB (BINDATA)
EXEC(@SQL_STR);
IF (SELECT TOP(1) IS_XML_ZIP FROM @BINBLOB) = 1
BEGIN
/*
In XSLX or compressed XML files the worksheet names are stored in a separate XML
document called xl/workbook.xml
*/
DECLARE @START_PAT VARCHAR(26) = 'xl/workbook.xml';
DECLARE @END_PAT VARCHAR(26) = 'xl/';
;WITH XMLNAMESPACES (
'http://schemas.openxmlformats.org/officeDocument/2006/relationships' AS R
,DEFAULT 'http://schemas.openxmlformats.org/spreadsheetml/2006/main')
,WORKBOOK_XML_START AS
(
SELECT
CHARINDEX(@START_PAT, BB.BINDATA,1) AS X_POS
,BB.BINDATA
FROM @BINBLOB BB
WHERE BB.IS_XML_ZIP = 1
)
,WORKBOOK_INFO AS
(
SELECT
CONVERT(XML,[SQL#].[Util_Inflate](SUBSTRING(WXS.BINDATA,WXS.X_POS
+ LEN(@START_PAT)
,CHARINDEX(@END_PAT,WXS.BINDATA,WXS.X_POS
+ LEN(@START_PAT)) - (WXS.X_POS + LEN(@START_PAT))))
,0) AS WXML
FROM WORKBOOK_XML_START WXS
)
SELECT
WXML.DATA.value('@name','NVARCHAR(255)') AS WS_NAME
FROM WORKBOOK_INFO WI
CROSS APPLY WI.WXML.nodes('workbook/sheets/sheet') AS WXML(DATA)
WHERE WXML.DATA.value('@state','NVARCHAR(255)') = N'visible';
END
ELSE IF (SELECT TOP(1) IS_XSL FROM @BINBLOB) = 1
BEGIN
/*
BIFF7/8 (Excel 95-2003) have a BOUNDSHEET:Sheet Information structure that
stores the worksheet names, the @BINPAT_START variable holds the record identifier
*/
DECLARE @BINPAT_START BINARY(2) = 0x8500;
;WITH SHEET_NAME_ARRAY AS
(
SELECT
CHARINDEX(@BINPAT_START,BB.BINDATA,1) AS X_POS
,BB.BINDATA
FROM @BINBLOB BB
WHERE BB.IS_XSL = 1
UNION ALL
SELECT
CHARINDEX(@BINPAT_START,SNA.BINDATA,SNA.X_POS + 11)
,SNA.BINDATA
FROM SHEET_NAME_ARRAY SNA
WHERE SNA.X_POS > 0
)
,BOUNDSHEET_ARRAY AS
(
SELECT
SNA.X_POS
,SNA.BINDATA
,CONVERT(INT,SUBSTRING(SNA.BINDATA,SNA.X_POS + 8,2),0) AS IS_HIDDEN
,CONVERT(INT,SUBSTRING(SNA.BINDATA,SNA.X_POS + 10,1),0) AS NAME_LEN
FROM SHEET_NAME_ARRAY SNA
WHERE SNA.X_POS > 0
)
SELECT
CONVERT(VARCHAR(256),SUBSTRING(BA.BINDATA,BA.X_POS + 12,BA.NAME_LEN),0) AS WS_NAME
FROM BOUNDSHEET_ARRAY BA
WHERE BA.IS_HIDDEN = 0;
END