Dim re As New Regex("((?<field>[^"",\r\n]+)|""(?<field>([^""]|"""")+)"")((?<rowbreak>,\r\n|\r\n|\n|$)|,)")
3700,Collin,Franc,,[email protected],Collins,Francine,,[email protected],"Enel North America, Inc.",One T Drive,Suite 220,MyCity,MA,77774,1,[email protected],,,,,3700,
3701,Steur,Larry,,[email protected],Steur,Larry,,[email protected],"St. Petersburg Corp, Inc.",10490 Gady Blvd,,MyCity,FL,33772,1,[email protected],,,,,3701
3705,Giger,Tina,CFO,[email protected],Giger,Tina,CFO,[email protected],Net Technologies,23 Calab Rd,Suite 202,Calabas,CA,77777,1,[email protected],,,,,3705,
The RegEx I'm using doesn't work in that if there are empty columns it "slides" the next column over into it's place. What can I do to allow so that the following makes five columns for each row?
1,2,3,4,5
Test,,,Test,
Test,,Test,,
Here's my full code:
<%@ Page Language="VB" MasterPageFile="~/_Common/MasterPage.master" %>
<%@ Import Namespace="System.Data" %>
<%@ Import Namespace="System.IO" %>
<%@ Import Namespace="System.Text.RegularExpressions" %>
<%@ Import Namespace="System.Xml" %>
<script runat="server">
Private Sub Page_Load(ByVal sender As Object, ByVal e As System.EventArgs)
' call the parser
Dim dt As DataTable = ParseCSVFile(Server.MapPath("Documents/Test.csv"))
' bind the resulting DataTable to a DataGrid Web Control
GridView1.DataSource = dt
GridView1.DataBind()
End Sub
Public Function ParseCSV(ByVal inputString As String) As DataTable
Dim dt As New DataTable()
' declare the Regular Expression that will match versus the input string
Dim re As New Regex("((?<field>[^"",\r\n]+)|""(?<field>([^""]|"""")+)"")((?<rowbreak>,\r\n|\r\n|\n|$)|,)")
Dim colArray As New ArrayList()
Dim rowArray As New ArrayList()
Dim colCount As Integer = 0
Dim maxColCount As Integer = 0
Dim rowbreak As String = ""
Dim field As String = ""
Dim mc As MatchCollection = re.Matches(inputString)
For Each m As Match In mc
' retrieve the field and replace two double-quotes with a single double-quote
field = m.Result("${field}").Replace("""""", """")
rowbreak = m.Result("${rowbreak}")
If field.Length > 0 Then
colArray.Add(field)
colCount += 1
End If
If rowbreak.Length > 0 Then
' add the column array to the row Array List
rowArray.Add(colArray.ToArray())
' create a new Array List to hold the field values
colArray = New ArrayList()
If colCount > maxColCount Then
maxColCount = colCount
End If
colCount = 0
End If
Next
If rowbreak.Length = 0 Then
' this is executed when the last line doesn't
' end with a line break
rowArray.Add(colArray.ToArray())
If colCount > maxColCount Then
maxColCount = colCount
End If
End If
' create the columns for the table
For i As Integer = 0 To maxColCount - 1
dt.Columns.Add([String].Format("col{0:000}", i))
Next
' convert the row Array List into an Array object for easier access
Dim ra As Array = rowArray.ToArray()
For i As Integer = 0 To ra.Length - 1
' create a new DataRow
Dim dr As DataRow = dt.NewRow()
' convert the column Array List into an Array object for easier access
Dim ca As Array = DirectCast((ra.GetValue(i)), Array)
' add each field into the new DataRow
For j As Integer = 0 To ca.Length - 1
dr(j) = ca.GetValue(j)
Next
' add the new DataRow to the DataTable
dt.Rows.Add(dr)
Next
' in case no data was parsed, create a single column
If dt.Columns.Count = 0 Then
dt.Columns.Add("NoData")
End If
Return dt
End Function
Public Function ParseCSVFile(ByVal path As String) As DataTable
Dim inputString As String = ""
' check that the file exists before opening it
If File.Exists(path) Then
Dim sr As New StreamReader(path)
inputString = sr.ReadToEnd()
sr.Close()
End If
Return ParseCSV(inputString)
End Function
</script>
<asp:Content ID="Content1" ContentPlaceHolderID="head" Runat="Server">
</asp:Content>
<asp:Content ID="Content2" ContentPlaceHolderID="ContentPlaceHolder1" Runat="Server">
<asp:FileUpload ID="FileUpload1" runat="server" />
<asp:Button ID="btnUpload" runat="server" Text="Upload" />
<asp:GridView ID="GridView1" runat="server">
</asp:GridView>
</asp:Content>
I actually would prefer doing this with Linq but I couldn't find any examples that used CSV files that had quoted values (for values that contained commas in the value itself).
** UPDATE **
I changed the RegEx to this and it's getting me closer:
Dim re As New Regex("(?<field>,)|((?<field>[^"",\r\n]+)|""(?<field>([^""]|"""")+)"")(,|(?<rowbreak>\r\n|\n|$))")
now it puts all the data in the correct columns but there's just a comma in the empty columns