I need to strip the "label" off the front of strings, e.g.
note: this is a note
needs to return:
note
and
this is a note
I've produced the following code example but am having trouble with the regexes.
What code do I need in the two ???????? areas below so that I get the desired results shown in the comments?
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace TestRegex8822
{
class Program
{
static void Main(string[] args)
{
List<string> lines = new List<string>();
lines.Add("note: this is a note");
lines.Add("test: just a test");
lines.Add("test:\t\t\tjust a test");
lines.Add("firstName: Jim"); //"firstName" IS a label because it does NOT contain a space
lines.Add("She said this to him: follow me."); //this is NOT a label since there is a space before the colon
lines.Add("description: this is the first description");
lines.Add("description:this is the second description"); //no space after colon
lines.Add("this is a line with no label");
foreach (var line in lines)
{
Console.WriteLine(StringHelpers.GetLabelFromLine(line));
Console.WriteLine(StringHelpers.StripLabelFromLine(line));
Console.WriteLine("--");
//note
//this is a note
//--
//test
//just a test
//--
//test
//just a test
//--
//firstName
//Jim
//--
//
//She said this to him: follow me.
//--
//description
//this is the first description
//--
//description
//this is the first description
//--
//
//this is a line with no label
//--
}
Console.ReadLine();
}
}
public static class StringHelpers
{
public static string GetLabelFromLine(this string line)
{
string label = line.GetMatch(@"^?:(\s)"); //???????????????
if (!label.IsNullOrEmpty())
return label;
else
return "";
}
public static string StripLabelFromLine(this string line)
{
return ...//???????????????
}
public static bool IsNullOrEmpty(this string line)
{
return String.IsNullOrEmpty(line);
}
}
public static class RegexHelpers
{
public static string GetMatch(this string text, string regex)
{
Match match = Regex.Match(text, regex);
if (match.Success)
{
string theMatch = match.Groups[0].Value;
return theMatch;
}
else
{
return null;
}
}
}
}
Added
@Keltex, I incorporated your idea as follows but it is not matching any of the text (all entries are blank), what do I need to tweak in the regex?
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace TestRegex8822
{
class Program
{
static void Main(string[] args)
{
List<string> lines = new List<string>();
lines.Add("note: this is a note");
lines.Add("test: just a test");
lines.Add("test:\t\t\tjust a test");
lines.Add("firstName: Jim"); //"firstName" IS a label because it does NOT contain a space
lines.Add("first name: Jim"); //"first name" is not a label because it contains a space
lines.Add("description: this is the first description");
lines.Add("description:this is the second description"); //no space after colon
lines.Add("this is a line with no label");
foreach (var line in lines)
{
LabelLinePair llp = line.GetLabelLinePair();
Console.WriteLine(llp.Label);
Console.WriteLine(llp.Line);
Console.WriteLine("--");
}
Console.ReadLine();
}
}
public static class StringHelpers
{
public static LabelLinePair GetLabelLinePair(this string line)
{
Regex regex = new Regex(@"(?<label>.+):\s*(?<text>.+)");
Match match = regex.Match(line);
LabelLinePair labelLinePair = new LabelLinePair();
labelLinePair.Label = match.Groups["label"].ToString();
labelLinePair.Line = match.Groups["line"].ToString();
return labelLinePair;
}
}
public class LabelLinePair
{
public string Label { get; set; }
public string Line { get; set; }
}
}
SOLVED:
Ok, I got it to work, plus added a little hack to take care of the labels with spaces and it's exactly what I wanted, THANKS!
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace TestRegex8822
{
class Program
{
static void Main(string[] args)
{
List<string> lines = new List<string>();
lines.Add("note: this is a note");
lines.Add("test: just a test");
lines.Add("test:\t\t\tjust a test");
lines.Add("firstName: Jim"); //"firstName" IS a label because it does NOT contain a space
lines.Add("first name: Jim"); //"first name" is not a label because it contains a space
lines.Add("description: this is the first description");
lines.Add("description:this is the second description"); //no space after colon
lines.Add("this is a line with no label");
lines.Add("she said to him: follow me");
foreach (var line in lines)
{
LabelLinePair llp = line.GetLabelLinePair();
Console.WriteLine(llp.Label);
Console.WriteLine(llp.Line);
Console.WriteLine("--");
}
Console.ReadLine();
}
}
public static class StringHelpers
{
public static LabelLinePair GetLabelLinePair(this string line)
{
Regex regex = new Regex(@"(?<label>.+):\s*(?<text>.+)");
Match match = regex.Match(line);
LabelLinePair llp = new LabelLinePair();
llp.Label = match.Groups["label"].ToString();
llp.Line = match.Groups["text"].ToString();
if (llp.Label.IsNullOrEmpty() || llp.Label.Contains(" "))
{
llp.Label = "";
llp.Line = line;
}
return llp;
}
public static bool IsNullOrEmpty(this string line)
{
return String.IsNullOrEmpty(line);
}
}
public class LabelLinePair
{
public string Label { get; set; }
public string Line { get; set; }
}
}