• Hi,

    Thanks Stan for submitting this. I regularly encounter csv documents that do not have a set number of columns or where the column names are changed regularly. This solution is ideal for my purposes. I can then change the column names more easily using T-SQL.

    Thanks 🙂

    I have enhanced this script to do the following:-

    Determine data types

    Skip blank rows on the csv

    Insert null for missing columns on any csv row

    Insert 1000 rows at a time

    I'm sure that there will be some bugs in this script and I will probably find them in the coming months. Please treat this as a beta version if you use it.

    You must first amend the constant variables at the top of this code to use it.

    ' Microsoft SQL Server Integration Services Script Task

    ' Write scripts using Microsoft Visual Basic 2008.

    ' The ScriptMain is the entry point class of the script.

    Imports System

    Imports System.Data

    Imports System.Math

    Imports Microsoft.SqlServer.Dts.Runtime

    Imports System.IO

    <System.AddIn.AddIn("ScriptMain", Version:="1.0", Publisher:="", Description:="")> _

    <System.CLSCompliantAttribute(False)> _

    Partial Public Class ScriptMain

    Inherits Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase

    Enum ScriptResults

    Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success

    Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure

    End Enum

    Const _SampleRows As Long = 100000

    Const source_directory As String = "C:\Path to your directory"

    Const SchemaName As String = "Schema For Csv Imports On Your Sql Server"

    Const DBConnection As String = "Name Of ADO.Net Connection In Your Package"

    Class DBType

    Private _HasDate As Boolean = False

    Private _HasFloat As Boolean = False

    Private _HasTiny As Boolean = False

    Private _HasSmall As Boolean = False

    Private _HasInt As Boolean = False

    Private _HasBig As Boolean = False

    Private _HasNumber As Boolean = False

    Private _HasString As Boolean = False

    Private _MaxLen As Long = 0

    Private _DBType As String

    Private _Container As String

    Public Property HasDate() As Boolean

    Get

    Return _HasDate

    End Get

    Set(ByVal value As Boolean)

    _HasDate = value

    End Set

    End Property

    Public Property HasFloat() As Boolean

    Get

    Return _HasFloat

    End Get

    Set(ByVal value As Boolean)

    _HasFloat = value

    End Set

    End Property

    Public Property HasTiny() As Boolean

    Get

    Return _HasTiny

    End Get

    Set(ByVal value As Boolean)

    _HasTiny = value

    End Set

    End Property

    Public Property HasSmall() As Boolean

    Get

    Return _HasSmall

    End Get

    Set(ByVal value As Boolean)

    _HasSmall = value

    End Set

    End Property

    Public Property HasInt() As Boolean

    Get

    Return _HasInt

    End Get

    Set(ByVal value As Boolean)

    _HasInt = value

    End Set

    End Property

    Public Property HasBig() As Boolean

    Get

    Return _HasBig

    End Get

    Set(ByVal value As Boolean)

    _HasBig = value

    End Set

    End Property

    Public Property HasString() As Boolean

    Get

    Return _HasString

    End Get

    Set(ByVal value As Boolean)

    _HasString = value

    End Set

    End Property

    Public Property MaxLen() As Long

    Get

    Return _MaxLen

    End Get

    Set(ByVal value As Long)

    _MaxLen = value

    End Set

    End Property

    Public ReadOnly Property DBType() As String

    Get

    Return _DBType

    End Get

    End Property

    Public ReadOnly Property Container() As String

    Get

    Return _Container

    End Get

    End Property

    Public Sub Prepare()

    If _HasString Or (_HasDate And _HasNumber) Then

    _DBType = "NVARCHAR(" & IIf(_MaxLen >= 50, "Max", _MaxLen) & ")"

    _Container = "'"

    ElseIf _HasDate Then

    _DBType = "DATETIME"

    _Container = "'"

    ElseIf _HasFloat Then

    _DBType = "FLOAT"

    _Container = ""

    ElseIf _HasBig Then

    _DBType = "BIGINT"

    _Container = ""

    ElseIf _HasInt Then

    _DBType = "INT"

    _Container = ""

    ElseIf _HasSmall Then

    _DBType = "SMALLINT"

    _Container = ""

    ElseIf _HasTiny Then

    _DBType = "TINYINT"

    _Container = ""

    Else

    _DBType = "NVARCHAR(Max)"

    _Container = "'"

    End If

    End Sub

    Public Sub ComputeValue(ByVal Value As String)

    If IsDate(Value) Then

    _HasDate = True

    ElseIf IsNumeric(Value) And Not IsDate(Value) Then

    _HasNumber = True

    Dim dbl As Double = CDbl(Value)

    If dbl Mod 1 <> 0 Then

    _HasFloat = True

    ElseIf dbl >= 0 And dbl <= 255 Then

    _HasTiny = True

    ElseIf dbl >= (0 - 32768) And dbl <= 32767 Then

    _HasSmall = True

    ElseIf dbl >= (0 - 2147483648) And dbl <= (2147483648 - 1) Then

    _HasInt = True

    ElseIf dbl >= (0 - 9223372036854775807) And dbl <= (9223372036854775807) Then

    _HasBig = True

    Else

    _HasFloat = True

    End If

    Else

    _HasString = True

    End If

    If _MaxLen < Value.Replace("'", "''").Length Then _MaxLen = Value.Replace("'", "''").Length

    End Sub

    End Class

    Public Sub Main()

    Dim source_file As String = "*.csv"

    Dim di As New System.IO.DirectoryInfo(source_directory)

    Dim aryFi As IO.FileInfo() = di.GetFiles(source_file)

    Dim fi As IO.FileInfo

    For Each fi In aryFi

    Dim sLine As String

    Dim arrText As New ArrayList()

    Dim objReader As New StreamReader(di.ToString & "\" & fi.ToString)

    Dim DBTypes As New Collection

    Do

    sLine = objReader.ReadLine()

    If Not sLine Is Nothing Then

    arrText.Add(sLine)

    End If

    Loop Until sLine Is Nothing

    objReader.Close()

    Dim FirstLine As String = ""

    Dim lngSampled As Long = 0

    For Each sLine In arrText

    If FirstLine = "" Then

    FirstLine = sLine

    ElseIf FirstLine <> "" And _SampleRows >= lngSampled Then

    Dim ID As Long = 0

    Dim DTyp As DBType

    For Each s As String In sLine.Split(",")

    If DBTypes.Contains("Col_" & ID) = False Then

    DTyp = New DBType

    DBTypes.Add(DTyp, "Col_" & ID)

    Else

    DTyp = DBTypes("Col_" & ID)

    End If

    DTyp.ComputeValue(s)

    ID += 1

    Next

    lngSampled += 1

    End If

    Next

    For Each dt As DBType In DBTypes

    dt.Prepare()

    Next

    Dim FieldNames() As String = Split(FirstLine, ",")

    Dim count As Integer = Split(FirstLine, ",").Length

    Dim mySqlStatement As String = "BEGIN TRY DROP TABLE " & SchemaName & "." & fi.Name.Replace(".", "_") & " END TRY BEGIN CATCH END CATCH CREATE TABLE " & SchemaName & "." & fi.Name.Replace(".", "_") & " ("

    Dim comma As String = ""

    For index As Integer = 0 To count - 1

    mySqlStatement += comma & "[" & FieldNames(index) & "]" & " " & CType(DBTypes("Col_" & index), DBType).DBType

    comma = ","

    Next

    mySqlStatement += ")"

    Dim myADONETConnection As SqlClient.SqlConnection = DirectCast(Dts.Connections(DBConnection).AcquireConnection(Dts.Transaction), SqlClient.SqlConnection)

    Dim myCommand As New SqlClient.SqlCommand(mySqlStatement, myADONETConnection)

    Try

    myCommand.ExecuteNonQuery()

    myADONETConnection.Close()

    Catch ex As Exception

    Dts.TaskResult = ScriptResults.Failure

    Throw ex

    Exit Sub

    End Try

    FirstLine = ""

    Dim line_count As Integer = 1

    mySqlStatement = "INSERT INTO " & SchemaName & "." & fi.Name.Replace(".", "_") & " VALUES "

    Dim comma2 As String = ""

    Dim c As Long = 0

    For Each sLine In arrText

    If Trim(sLine) <> "" Then

    If FirstLine = "" Then

    FirstLine = sLine

    Else

    line_count += 1

    Dim fields() As String = Split(sLine, ",")

    Dim FieldCount As Integer = Split(sLine, ",").Length

    mySqlStatement &= comma2 & "("

    comma = ""

    For FieldIndex As Integer = 0 To FieldCount - 1

    If FieldIndex <= FieldNames.Length - 1 Then

    Dim dtyp As DBType

    dtyp = DBTypes("Col_" & FieldIndex)

    If Not dtyp.DBType Like "*varchar*" And Trim(fields(FieldIndex)) = "" Then

    mySqlStatement += comma & "NULL"

    Else

    If dtyp.DBType = "DATETIME" Then

    mySqlStatement += comma & dtyp.Container & CDate(fields(FieldIndex)).ToString("yyyy-MM-dd hh:mm:ss tt") & dtyp.Container

    Else

    mySqlStatement += comma & dtyp.Container & Replace(fields(FieldIndex), "'", "''") & dtyp.Container

    End If

    End If

    comma = ","

    End If

    Next

    If FieldCount < FieldNames.Length Then

    For i = FieldCount To FieldNames.Length - 1

    mySqlStatement += comma & "NULL"

    Next

    comma = ","

    End If

    mySqlStatement += ")"

    c += 1

    If c = 1000 Then

    Try

    myADONETConnection = DirectCast(Dts.Connections(DBConnection).AcquireConnection(Dts.Transaction), SqlClient.SqlConnection)

    myCommand = New SqlClient.SqlCommand(mySqlStatement, myADONETConnection)

    myCommand.ExecuteNonQuery()

    myADONETConnection.Close()

    Catch ex As Exception

    Dts.TaskResult = ScriptResults.Failure

    Throw ex

    Exit Sub

    End Try

    mySqlStatement = "INSERT INTO " & SchemaName & "." & fi.Name.Replace(".", "_") & " VALUES "

    comma2 = ""

    c = 0

    Else

    comma2 = ","

    End If

    End If

    End If

    Next

    If mySqlStatement <> "INSERT INTO " & SchemaName & "." & fi.Name.Replace(".", "_") & " VALUES " Then

    Try

    myADONETConnection = DirectCast(Dts.Connections(DBConnection).AcquireConnection(Dts.Transaction), SqlClient.SqlConnection)

    myCommand = New SqlClient.SqlCommand(mySqlStatement, myADONETConnection)

    myCommand.ExecuteNonQuery()

    myADONETConnection.Close()

    Catch ex As Exception

    Dts.TaskResult = ScriptResults.Failure

    End Try

    End If

    Dim file As New System.IO.FileInfo(di.ToString & fi.ToString)

    Try

    file.Delete()

    Catch ex As Exception

    Dts.TaskResult = ScriptResults.Failure

    End Try

    DBTypes = Nothing

    Next

    Dts.TaskResult = ScriptResults.Success

    End Sub

    End Class