Curious if this can be simplified...
internal static IEnumerable<string> Split(string str, char sep = ',')
{
int lastIndex = 0;
bool quoted = false;
bool escaped = false;
bool bracketed = false;
char lastQuote = '\0';
for (int i = 0; i < str.Length; ++i)
{
if (str[i] == '[')
{
if (!quoted && !escaped)
bracketed = true;
escaped = false;
}
else if (str[i] == ']')
{
if (!quoted && !escaped)
bracketed = false;
escaped = false;
}
else if (str[i] == '\\')
{
escaped = !escaped;
}
else if (str[i] == '"' || str[i] == '\'')
{
if (!escaped)
{
if (quoted)
{
if (lastQuote == str[i])
quoted = false;
}
else
{
quoted = true;
lastQuote = str[i];
}
}
escaped = false;
}
else if (str[i] == sep)
{
if (!quoted && !escaped && !bracketed)
{
yield return str.Substring(lastIndex, i - lastIndex);
lastIndex = i + 1;
}
escaped = false;
}
else
{
escaped = false;
}
}
yield return str.Substring(lastIndex);
}
Wrote this method to split on commas that aren't inside []
, are not quoted, and are not escaped. Is this inherently a tricky problem, or did I take a dumb approach?
Input:
foreach(var sel in SharpQuery.SplitCommas("\"comma, in quotes\", comma[in,brackets], comma[in \"quotes, and brackets\"], \"woah, 'nelly,' \\\"now you,re [talking, crazy\\\"\"")) {
Console.WriteLine(sel);
}
Expected output:
"comma, in quotes"
comma[in,brackets]
comma[in "quotes, and brackets"]
"woah, 'nelly,' \"now you,re [talking, crazy\""