for my application i have to parse CSV file using Erlang.following is the code which will parse CSV using Erlang:-
parse_file(Fn) ->
{ok, Data} = file:read_file(Fn),
parse(binary_to_list(Data)).
parse(Data) -> lists:reverse(parse(Data, [])).
parse([], Acc) -> Acc;
parse(Data, Acc) ->
{Line, Tail} = parse_line(Data),
parse(Tail, [Line|Acc]).
parse_line(Data) ->
{Line, Tail} = parse_line(Data, []),
{lists:reverse(Line), Tail}.
parse_line([13,10|Data], Acc) -> {Acc, Data};
parse_line([10|Data], Acc) -> {Acc, Data};
parse_line([13|Data], Acc) -> {Acc, Data};
parse_line([], Acc) -> {Acc, []};
parse_line([$,,$,|Data], Acc) -> parse_line(Data, [""|Acc]);
parse_line([$,|Data], Acc) -> parse_line(Data, Acc);
parse_line(Data, Acc) ->
{Fld, Tail} = parse_field(Data),
parse_line(Tail, [Fld|Acc]).
parse_field([34|Data]) ->
{Fld, Tail} = parse_fieldq(Data, ""),
{lists:reverse(Fld), Tail};
parse_field(Data) ->
{Fld, Tail} = parse_field(Data, ""),
{lists:reverse(Fld), Tail}.
parse_field([$,|Tail], Acc) -> {Acc, [$,|Tail]};
parse_field([13|Tail], Acc) -> {Acc, [13|Tail]};
parse_field([10|Tail], Acc) -> {Acc, [10|Tail]};
parse_field([], Acc) -> {Acc, []};
parse_field([Ch|Tail], Acc) -> parse_field(Tail, [Ch|Acc]).
parse_fieldq([34,34|Tail], Acc) -> parse_fieldq(Tail, [34|Acc]);
parse_fieldq([34|Tail], Acc) -> {Acc, Tail};
parse_fieldq([Ch|Tail], Acc) -> parse_fieldq(Tail, [Ch|Acc]).
this code works fine but having two issues:- 1-since the code parse using double quote ("") and comma(,) and separate each value..but in following example if First name consist of double quote sting within it then the parser will create one more field.
"Type","First Name","Last Name","Email"
"Contact","Ashwani Garg ------"All Pain Will End."","","[email protected]"
result:-
[["contact"],["Ashwani Garg ------"],["All Pain Will End."],[],["[email protected]"]]
expected result:-
[["contact"],["Ashwani Garg ------All Pain Will End."],[],["[email protected]"]]
2-for the following kind of csv its for value,its truncate some value:- First Name,Last Name,Middle Name,Name,Nickname,E-mail Address,Home Street,Home City,Home Postal Code,Home State,Home Country/Region,Home Phone,Home Fax,Mobile Phone,Personal Web Page,Business Street,Business City,Business Postal Code,Business State,Business Country/Region,Business Web Page,Business Phone,Business Fax,Pager,Company,Job Title,Department,Office Location,Notes
Affection,,,Affection,,,,,,,,+919845141544,,+919845141544,,,,,,,,,,,,,,,
result:-
[["Affection"],[],[],["Affection"],[],[],[],[],[],[],[],["+919845141544"],[],["+919845141544"],[],[],[],[],[],[],[]]
expected result:-
[["Affection"],[],[],["Affection"],[],[],[],[],[],[],[],["+919845141544"],[],["+919845141544"],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
Please help me ...for refernce please use the following link:- http://ppolv.wordpress.com/2008/02/25/parsing-csv-in-erlang/