Hi,
Thanks Stan for submitting this. I regularly encounter csv documents that do not have a set number of columns or where the column names are changed regularly. This solution is ideal for my purposes. I can then change the column names more easily using T-SQL.
Thanks 🙂
I have enhanced this script to do the following:-
Determine data types
Skip blank rows on the csv
Insert null for missing columns on any csv row
Insert 1000 rows at a time
I'm sure that there will be some bugs in this script and I will probably find them in the coming months. Please treat this as a beta version if you use it.
You must first amend the constant variables at the top of this code to use it.
' Microsoft SQL Server Integration Services Script Task
' Write scripts using Microsoft Visual Basic 2008.
' The ScriptMain is the entry point class of the script.
Imports System
Imports System.Data
Imports System.Math
Imports Microsoft.SqlServer.Dts.Runtime
Imports System.IO
<System.AddIn.AddIn("ScriptMain", Version:="1.0", Publisher:="", Description:="")> _
<System.CLSCompliantAttribute(False)> _
Partial Public Class ScriptMain
Inherits Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
Enum ScriptResults
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
End Enum
Const _SampleRows As Long = 100000
Const source_directory As String = "C:\Path to your directory"
Const SchemaName As String = "Schema For Csv Imports On Your Sql Server"
Const DBConnection As String = "Name Of ADO.Net Connection In Your Package"
Class DBType
Private _HasDate As Boolean = False
Private _HasFloat As Boolean = False
Private _HasTiny As Boolean = False
Private _HasSmall As Boolean = False
Private _HasInt As Boolean = False
Private _HasBig As Boolean = False
Private _HasNumber As Boolean = False
Private _HasString As Boolean = False
Private _MaxLen As Long = 0
Private _DBType As String
Private _Container As String
Public Property HasDate() As Boolean
Get
Return _HasDate
End Get
Set(ByVal value As Boolean)
_HasDate = value
End Set
End Property
Public Property HasFloat() As Boolean
Get
Return _HasFloat
End Get
Set(ByVal value As Boolean)
_HasFloat = value
End Set
End Property
Public Property HasTiny() As Boolean
Get
Return _HasTiny
End Get
Set(ByVal value As Boolean)
_HasTiny = value
End Set
End Property
Public Property HasSmall() As Boolean
Get
Return _HasSmall
End Get
Set(ByVal value As Boolean)
_HasSmall = value
End Set
End Property
Public Property HasInt() As Boolean
Get
Return _HasInt
End Get
Set(ByVal value As Boolean)
_HasInt = value
End Set
End Property
Public Property HasBig() As Boolean
Get
Return _HasBig
End Get
Set(ByVal value As Boolean)
_HasBig = value
End Set
End Property
Public Property HasString() As Boolean
Get
Return _HasString
End Get
Set(ByVal value As Boolean)
_HasString = value
End Set
End Property
Public Property MaxLen() As Long
Get
Return _MaxLen
End Get
Set(ByVal value As Long)
_MaxLen = value
End Set
End Property
Public ReadOnly Property DBType() As String
Get
Return _DBType
End Get
End Property
Public ReadOnly Property Container() As String
Get
Return _Container
End Get
End Property
Public Sub Prepare()
If _HasString Or (_HasDate And _HasNumber) Then
_DBType = "NVARCHAR(" & IIf(_MaxLen >= 50, "Max", _MaxLen) & ")"
_Container = "'"
ElseIf _HasDate Then
_DBType = "DATETIME"
_Container = "'"
ElseIf _HasFloat Then
_DBType = "FLOAT"
_Container = ""
ElseIf _HasBig Then
_DBType = "BIGINT"
_Container = ""
ElseIf _HasInt Then
_DBType = "INT"
_Container = ""
ElseIf _HasSmall Then
_DBType = "SMALLINT"
_Container = ""
ElseIf _HasTiny Then
_DBType = "TINYINT"
_Container = ""
Else
_DBType = "NVARCHAR(Max)"
_Container = "'"
End If
End Sub
Public Sub ComputeValue(ByVal Value As String)
If IsDate(Value) Then
_HasDate = True
ElseIf IsNumeric(Value) And Not IsDate(Value) Then
_HasNumber = True
Dim dbl As Double = CDbl(Value)
If dbl Mod 1 <> 0 Then
_HasFloat = True
ElseIf dbl >= 0 And dbl <= 255 Then
_HasTiny = True
ElseIf dbl >= (0 - 32768) And dbl <= 32767 Then
_HasSmall = True
ElseIf dbl >= (0 - 2147483648) And dbl <= (2147483648 - 1) Then
_HasInt = True
ElseIf dbl >= (0 - 9223372036854775807) And dbl <= (9223372036854775807) Then
_HasBig = True
Else
_HasFloat = True
End If
Else
_HasString = True
End If
If _MaxLen < Value.Replace("'", "''").Length Then _MaxLen = Value.Replace("'", "''").Length
End Sub
End Class
Public Sub Main()
Dim source_file As String = "*.csv"
Dim di As New System.IO.DirectoryInfo(source_directory)
Dim aryFi As IO.FileInfo() = di.GetFiles(source_file)
Dim fi As IO.FileInfo
For Each fi In aryFi
Dim sLine As String
Dim arrText As New ArrayList()
Dim objReader As New StreamReader(di.ToString & "\" & fi.ToString)
Dim DBTypes As New Collection
Do
sLine = objReader.ReadLine()
If Not sLine Is Nothing Then
arrText.Add(sLine)
End If
Loop Until sLine Is Nothing
objReader.Close()
Dim FirstLine As String = ""
Dim lngSampled As Long = 0
For Each sLine In arrText
If FirstLine = "" Then
FirstLine = sLine
ElseIf FirstLine <> "" And _SampleRows >= lngSampled Then
Dim ID As Long = 0
Dim DTyp As DBType
For Each s As String In sLine.Split(",")
If DBTypes.Contains("Col_" & ID) = False Then
DTyp = New DBType
DBTypes.Add(DTyp, "Col_" & ID)
Else
DTyp = DBTypes("Col_" & ID)
End If
DTyp.ComputeValue(s)
ID += 1
Next
lngSampled += 1
End If
Next
For Each dt As DBType In DBTypes
dt.Prepare()
Next
Dim FieldNames() As String = Split(FirstLine, ",")
Dim count As Integer = Split(FirstLine, ",").Length
Dim mySqlStatement As String = "BEGIN TRY DROP TABLE " & SchemaName & "." & fi.Name.Replace(".", "_") & " END TRY BEGIN CATCH END CATCH CREATE TABLE " & SchemaName & "." & fi.Name.Replace(".", "_") & " ("
Dim comma As String = ""
For index As Integer = 0 To count - 1
mySqlStatement += comma & "[" & FieldNames(index) & "]" & " " & CType(DBTypes("Col_" & index), DBType).DBType
comma = ","
Next
mySqlStatement += ")"
Dim myADONETConnection As SqlClient.SqlConnection = DirectCast(Dts.Connections(DBConnection).AcquireConnection(Dts.Transaction), SqlClient.SqlConnection)
Dim myCommand As New SqlClient.SqlCommand(mySqlStatement, myADONETConnection)
Try
myCommand.ExecuteNonQuery()
myADONETConnection.Close()
Catch ex As Exception
Dts.TaskResult = ScriptResults.Failure
Throw ex
Exit Sub
End Try
FirstLine = ""
Dim line_count As Integer = 1
mySqlStatement = "INSERT INTO " & SchemaName & "." & fi.Name.Replace(".", "_") & " VALUES "
Dim comma2 As String = ""
Dim c As Long = 0
For Each sLine In arrText
If Trim(sLine) <> "" Then
If FirstLine = "" Then
FirstLine = sLine
Else
line_count += 1
Dim fields() As String = Split(sLine, ",")
Dim FieldCount As Integer = Split(sLine, ",").Length
mySqlStatement &= comma2 & "("
comma = ""
For FieldIndex As Integer = 0 To FieldCount - 1
If FieldIndex <= FieldNames.Length - 1 Then
Dim dtyp As DBType
dtyp = DBTypes("Col_" & FieldIndex)
If Not dtyp.DBType Like "*varchar*" And Trim(fields(FieldIndex)) = "" Then
mySqlStatement += comma & "NULL"
Else
If dtyp.DBType = "DATETIME" Then
mySqlStatement += comma & dtyp.Container & CDate(fields(FieldIndex)).ToString("yyyy-MM-dd hh:mm:ss tt") & dtyp.Container
Else
mySqlStatement += comma & dtyp.Container & Replace(fields(FieldIndex), "'", "''") & dtyp.Container
End If
End If
comma = ","
End If
Next
If FieldCount < FieldNames.Length Then
For i = FieldCount To FieldNames.Length - 1
mySqlStatement += comma & "NULL"
Next
comma = ","
End If
mySqlStatement += ")"
c += 1
If c = 1000 Then
Try
myADONETConnection = DirectCast(Dts.Connections(DBConnection).AcquireConnection(Dts.Transaction), SqlClient.SqlConnection)
myCommand = New SqlClient.SqlCommand(mySqlStatement, myADONETConnection)
myCommand.ExecuteNonQuery()
myADONETConnection.Close()
Catch ex As Exception
Dts.TaskResult = ScriptResults.Failure
Throw ex
Exit Sub
End Try
mySqlStatement = "INSERT INTO " & SchemaName & "." & fi.Name.Replace(".", "_") & " VALUES "
comma2 = ""
c = 0
Else
comma2 = ","
End If
End If
End If
Next
If mySqlStatement <> "INSERT INTO " & SchemaName & "." & fi.Name.Replace(".", "_") & " VALUES " Then
Try
myADONETConnection = DirectCast(Dts.Connections(DBConnection).AcquireConnection(Dts.Transaction), SqlClient.SqlConnection)
myCommand = New SqlClient.SqlCommand(mySqlStatement, myADONETConnection)
myCommand.ExecuteNonQuery()
myADONETConnection.Close()
Catch ex As Exception
Dts.TaskResult = ScriptResults.Failure
End Try
End If
Dim file As New System.IO.FileInfo(di.ToString & fi.ToString)
Try
file.Delete()
Catch ex As Exception
Dts.TaskResult = ScriptResults.Failure
End Try
DBTypes = Nothing
Next
Dts.TaskResult = ScriptResults.Success
End Sub
End Class