using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Data;
using System.Text.RegularExpressions;
namespace pro.Classes
{
public class CSVParser
{
Stream _stream;
/// /// Creates Parser from Stream /// ///
public CSVParser(Stream aStream)
{
_stream = aStream;
CSVregEx = new System.Text.RegularExpressions.Regex("(\"([^\"]*|\"{2})*\"(,|$))|\"[^\"]*\"(,|$)|[^,]+(,|$)|(,)");
}
string delimiter = ",";
string quotes = "\"";
System.Text.RegularExpressions.Regex CSVregEx;
protected string[] BreakCSV(string source)
{
MatchCollection matches = CSVregEx.Matches(source);
string[] res = new string[matches.Count];
int i = 0; foreach (Match m in matches)
{
res[i] = m.Groups[0].Value.TrimEnd(delimiter[0]).Trim(quotes[0]); i++;
}
return res;
}
private string _TableName = "CSV";
public string TableName
{
get
{
return _TableName;
}
set
{
_TableName = value;
}
}
public DataTable ParseToDataTable()
{
StreamReader reader = new StreamReader(_stream);
string firstLine = reader.ReadLine();
string[] columns = BreakCSV(firstLine);
DataTable result = new DataTable();
result.TableName = TableName;
foreach (string s in columns)
{ DataColumn cm = new DataColumn(s, typeof(String));
cm.ColumnMapping = MappingType.Attribute; result.Columns.Add(cm);
} string line =""; while (!reader.EndOfStream)
{
line = reader.ReadLine();
string[] data = BreakCSV(line);
int i = 0; DataRow dr = result.NewRow();
foreach (string d in data) { dr[i++] = d; } result.Rows.Add(dr);
}
reader.Close();
_stream.Close();
return result;
}
}
}
views:
365answers:
3It look like you're trying to parse CSV; see "A Class File which parses csv and returns a dataSet as a result ASP.Net" for a related question, or look at CsvReader.
I would have to look very hard at that regex to see if it covers all the edge cases, but I'm pretty confident that CsvReader does, including:
- alternative separators (think: French Excel, tsv, pipe-delimited, etc)
- quote-escaped
- multi-line
etc
Oh - and if you're using a Regex
like that, it might as well be static readonly
, and pre-compiled (RegexOptions.Compiled
).
I also came accross http://kbcsv.codeplex.com/ which has an excellent library for importing CSV files. I've looked for hourse and came across this. I've tried it with a number of formats and must tip my hat to the gent who built it and made it available. This has saved me days of time.
Instead of using a regex or writing your own parser in general why not use the TextFieldParser in the Microsoft.VisualBasic.FileIO namespace? I used this in c# for something at work with good results. Then you'd just have to write the wrapper code to return the data as you'd like.