views:

80

answers:

2

Question: How does the .NET regex string to extract IPv6 addresses look like ?

I can get it to extract a simple IPv6 address like "1050:0:0:0:5:600:300c:326b" but not the colon format ("ff06::c3");

My problem is, it should extract a 0 for every omitted value between the :: How do I do that?

Below my code + description.

Specify IPv6 addresses by omitting leading zeros.
For example, IPv6 address 1050:0000:0000:0000:0005:0600:300c:326b
may be written as 1050:0:0:0:5:600:300c:326b.
Double colon
Specify IPv6 addresses by using double colons (::) in place of a series of zeros.
For example, IPv6 address ff06:0:0:0:0:0:0:c3
may be written as ff06::c3.
Double colons may be used only once in an IP address.

        strInputString = "ff06::c3";
        strInputString = "1050:0000:0000:0000:0005:0600:300c:326b";

        string strPattern = "([A-Fa-f0-9]{1,4}:){7}([A-Fa-f0-9]{1,4})";
        //strPattern = @"\A(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\z";

        //strPattern = @"(\A([0-9a-f]{1,4}:){1,1}(:[0-9a-f]{1,4}){1,6}\Z)|(\A([0-9a-f]{1,4}:){1,2}(:[0-9a-f]{1,4}){1,5}\Z)|(\A([0-9a-f]{1,4}:){1,3}(:[0-9a-f]{1,4}){1,4}\Z)|(\A([0-9a-f]{1,4}:){1,4}(:[0-9a-f]{1,4}){1,3}\Z)|(\A([0-9a-f]{1,4}:){1,5}(:[0-9a-f]{1,4}){1,2}\Z)|(\A([0-9a-f]{1,4}:){1,6}(:[0-9a-f]{1,4}){1,1}\Z)|(\A(([0-9a-f]{1,4}:){1,7}|:):\Z)|(\A:(:[0-9a-f]{1,4}){1,7}\Z)|(\A((([0-9a-f]{1,4}:){6})(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})\Z)|(\A(([0-9a-f]{1,4}:){5}[0-9a-f]{1,4}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})\Z)|(\A([0-9a-f]{1,4}:){5}:[0-9a-f]{1,4}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\Z)|(\A([0-9a-f]{1,4}:){1,1}(:[0-9a-f]{1,4}){1,4}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\Z)|(\A([0-9a-f]{1,4}:){1,2}(:[0-9a-f]{1,4}){1,3}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\Z)|(\A([0-9a-f]{1,4}:){1,3}(:[0-9a-f]{1,4}){1,2}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\Z)|(\A([0-9a-f]{1,4}:){1,4}(:[0-9a-f]{1,4}){1,1}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\Z)|(\A(([0-9a-f]{1,4}:){1,5}|:):(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\Z)|(\A:(:[0-9a-f]{1,4}){1,5}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\Z) ";
        //strPattern = @"/^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\s*$/";
        //strPattern = @"(:?[0-9a-fA-F]{1,4}:){7}([0-9a-fA-F]{1,4})\z";
        //strPattern = @"\A((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)\z";


        //strPattern = @"\A((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}:)*)(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\z";
        //strPattern = @"/^(?:(?:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9](?::|$)){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))$/i";

        System.Text.RegularExpressions.Regex reValidationRule = new System.Text.RegularExpressions.Regex("^" + strPattern + "$");


        if (reValidationRule.Match(strInputString).Success) // If matching pattern
        {
            System.Text.RegularExpressions.Match maResult = System.Text.RegularExpressions.Regex.Match(strInputString, strPattern);
            // Console.WriteLine(maResult.Groups.Count)

            string[] astrReturnValues = new string[4];


            System.Text.RegularExpressions.GroupCollection gc = maResult.Groups;
            System.Text.RegularExpressions.CaptureCollection cc;

            int counter;
            //System.Web.Script.Serialization.JavaScriptSerializer jssJSONserializer  = new System.Web.Script.Serialization.JavaScriptSerializer();
            //Console.WriteLine(jssJSONserializer.Serialize());
            // Loop through each group.
            for (int i = 0; i < gc.Count; i++)
            {
                Console.WriteLine("Group: {0}", i);
                cc = gc[i].Captures;
                counter = cc.Count;

                // Print number of captures in this group.
                Console.WriteLine("Captures count = " + counter.ToString());

                // Loop through each capture in group.
                for (int ii = 0; ii < counter; ii++)
                {
                    Console.WriteLine("Capture: {0}", ii);
                    // Print capture and position.
                    Console.WriteLine(cc[ii] + "   Starts at character " +
                        cc[ii].Index);
                }
            }
A: 

Regex.Replace() may aid your cause. You could use it like this to return a mutated string with "::" replaced with ":0:" in a string

var myStr = "test::me";
Console.WriteLine( Regex.Replace(myStr, @"::", ":0:") );

I tested that out with LINQPad and it worked just fine for me.

jlafay
The only problem is you don't know HOW MANY "0:" are replaced by ::
Quandary
Ok sorry about that, I thought you were encountering "::" and need to replace it with ":0:"
jlafay
+2  A: 
    private static IEnumerable<string> Parse(string input)
    {
        const string partPattern = @"([A-Fa-f0-9]{1,4})";
        string longPattern = string.Format(@"{0} (?:\:{0})*", partPattern); //Group 1 -> 2
        string compactPattern = string.Format(@"{0} (?:\:{0})* \:\: {0} (?:\:{0})*", partPattern, RegexOptions.IgnorePatternWhitespace); //Groups 3 ->6
        string completePattern = string.Format(@"^{0}$ | ^{1}$", longPattern, compactPattern);
        var match = Regex.Match(input, completePattern, RegexOptions.IgnorePatternWhitespace);
        if (match.Success)
        {
            if (match.Groups[1].Success)
            {
                yield return match.Groups[1].Value.PadLeft(4, '0');
                for (int i = 0; i < match.Groups[2].Captures.Count; ++i)
                    yield return match.Groups[2].Captures[i].Value.PadLeft(4, '0');
            }
            else
            {
                var count = 6 - match.Groups[4].Captures.Count - match.Groups[6].Captures.Count;

                //First part
                yield return match.Groups[3].Value.PadLeft(4, '0');
                for (int i = 0; i < match.Groups[4].Captures.Count; ++i)
                    yield return match.Groups[4].Captures[i].Value.PadLeft(4, '0');

                //:: block
                for (int i = 0; i < count; ++i)
                    yield return "0000";

                //Second part
                yield return match.Groups[5].Value.PadLeft(4, '0');
                for (int i = 0; i < match.Groups[6].Captures.Count; ++i)
                    yield return match.Groups[6].Captures[i].Value.PadLeft(4, '0');
            }
        }
        else
            throw new Exception("No match");
    }

Something like this should work. The basic idea is just to split the case of the address containing :: and the rest into two different patterns. I used the assumption that :: can never be at the beginning or end so if that doesn't hold you'll have to modify the patterns a bit.

alun