I've had a much better idea. Create an XML format file like so:
<?xml version="1.0" ?>
<BCPFORMAT xmlns="http://schemas.microsoft.com/sqlserver/2004/bulkload/format" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<RECORD>
<FIELD ID="01" xsi:type="CharTerm" TERMINATOR="|" MAX_LENGTH="50"/>
<FIELD ID="02" xsi:type="CharTerm" TERMINATOR="|" MAX_LENGTH="50"/>
<FIELD ID="03" xsi:type="CharTerm" TERMINATOR="|" MAX_LENGTH="50"/>
<FIELD ID="04" xsi:type="CharTerm" TERMINATOR="|" MAX_LENGTH="50"/>
<FIELD ID="05" xsi:type="CharTerm" TERMINATOR="|" MAX_LENGTH="50"/>
<FIELD ID="06" xsi:type="CharTerm" TERMINATOR="|" MAX_LENGTH="50"/>
<FIELD ID="07" xsi:type="CharTerm" TERMINATOR="|" MAX_LENGTH="50"/>
<FIELD ID="08" xsi:type="CharTerm" TERMINATOR="|" MAX_LENGTH="50"/>
<FIELD ID="09" xsi:type="CharTerm" TERMINATOR="|" MAX_LENGTH="50"/>
<FIELD ID="10" xsi:type="CharTerm" TERMINATOR="\r" MAX_LENGTH="50"/>
</RECORD>
<ROW>
<COLUMN SOURCE="01" NAME="col1_dirty" xsi:type="SQLVARYCHAR" />
<COLUMN SOURCE="02" NAME="col2_dirty" xsi:type="SQLVARYCHAR" />
<COLUMN SOURCE="03" NAME="col3_dirty" xsi:type="SQLVARYCHAR" />
<COLUMN SOURCE="04" NAME="col4_dirty" xsi:type="SQLVARYCHAR" />
<COLUMN SOURCE="05" NAME="col5_dirty" xsi:type="SQLVARYCHAR" />
<COLUMN SOURCE="06" NAME="col6_dirty" xsi:type="SQLVARYCHAR" />
<COLUMN SOURCE="07" NAME="col7_dirty" xsi:type="SQLVARYCHAR" />
<COLUMN SOURCE="08" NAME="col8_dirty" xsi:type="SQLVARYCHAR" />
<COLUMN SOURCE="09" NAME="col9_dirty" xsi:type="SQLVARYCHAR" />
<COLUMN SOURCE="10" NAME="col10_dirty" xsi:type="SQLVARYCHAR" />
</ROW>
</BCPFORMAT>
And use the BULK OPENROWSET provider to allow you to pre-process the raw data before inserting into the destination table:
INSERT dbo.zz_u_test_TEMP WITH (TABLOCK)
(
col1_dirty, col2_dirty, col3_dirty, col4_dirty, col5_dirty,
col6_dirty, col7_dirty, col8_dirty, col9_dirty, col10_dirty
)
SELECT REPLACE(RowSource.col1_dirty, CHAR(34), SPACE(0)),
REPLACE(RowSource.col2_dirty, CHAR(34), SPACE(0)),
REPLACE(RowSource.col3_dirty, CHAR(34), SPACE(0)),
REPLACE(RowSource.col4_dirty, CHAR(34), SPACE(0)),
REPLACE(RowSource.col5_dirty, CHAR(34), SPACE(0)),
REPLACE(RowSource.col6_dirty, CHAR(34), SPACE(0)),
RowSource.col7_dirty,
RowSource.col8_dirty,
RowSource.col9_dirty,
RowSource.col10_dirty
FROM OPENROWSET
(
BULK 'C:\Documents and Settings\Paul\Test.data',
FORMATFILE = 'C:\Documents and Settings\Paul\format.xml',
CODEPAGE = 'RAW',
FIRSTROW = 0,
LASTROW = 0,
MAXERRORS = 0,
ROWS_PER_BATCH = 0
) RowSource;
This preserves all the benefits of minimally-logged fast bulk load, while giving you the opportunity to apply transformations in the SELECT clause. Very cool.
The data loaded using the simple example above is:
col1_dirty col2_dirty col3_dirty col4_dirty col5_dirty col6_dirty col7_dirty col8_dirty col9_dirty col10_dirty
1 7 D 2 _ NULL 05/25/2007 05/25/2007 10.00 2.15
Paul
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi