views:

1000

answers:

8

I have a method that gets a number of objects of this class

class Range<T>
{
    public T Start;
    public T End;
}

In my case T is DateTime, but lets use int for simplicity. I would like a method that collapses those ranges into ones that cover the same "area" but that do not overlap.

So if I had the following ranges

  • 1 to 5
  • 3 to 9
  • 11 to 15
  • 12 to 14
  • 13 to 20

The method should give me

  • 1 to 9
  • 11 to 20

Guess it would be called a union? I imagine the method signature could look something like this:

public static IEnumerable<Range<T>> Collapse<T>(
    this IEnumerable<Range<T>>, 
    IComparable<T> comparer)
{
    ...
}

I have looked at some other questions here that are kind of similar, but I haven't found an implementation of this yet. This answer and some other answers to the same question describes algorithms, but I am not quite sure if I understand the algorithms. Not especially good at implementing algorithms either, so I was hoping someone here could help me out.

+1  A: 

This could probably be optimized...

using System.Collections.Generic;
using System.Linq;
using System;
static class Range
{
    public static Range<T> Create<T>(T start, T end)
    {
        return new Range<T>(start, end);
    }
    public static IEnumerable<Range<T>> Normalize<T>(
        this IEnumerable<Range<T>> ranges)
    {
        return Normalize<T>(ranges, null);
    }
    public static IEnumerable<Range<T>> Normalize<T>(
        this IEnumerable<Range<T>> ranges, IComparer<T> comparer)
    {
        var list = ranges.ToList();
        if (comparer == null) comparer = Comparer<T>.Default;
        for (int i = list.Count - 1; i >= 0; i--)
        {
            var item = list[i];

            for (int j = 0; j < i; j++)
            {
                Range<T>? newValue = TryMerge<T>(comparer, item, list[j]);

                // did we find a useful transformation?
                if (newValue != null)
                {
                    list[j] = newValue.GetValueOrDefault();
                    list.RemoveAt(i);
                    break;
                }
            }
        }
        list.Sort((x, y) =>
        {
            int t = comparer.Compare(x.Start, y.Start);
            if (t == 0) t = comparer.Compare(x.End, y.End);
            return t;
        });
        return list.AsEnumerable();
    }

    private static Range<T>? TryMerge<T>(IComparer<T> comparer, Range<T> item, Range<T> other)
    {
        if (comparer.Compare(other.End, item.Start) == 0)
        { // adjacent ranges
            return new Range<T>(other.Start, item.End);
        }
        if (comparer.Compare(item.End, other.Start) == 0)
        { // adjacent ranges
            return new Range<T>(item.Start, other.End);
        }
        if (comparer.Compare(item.Start, other.Start) <= 0
            && comparer.Compare(item.End, other.End) >= 0)
        { // item fully swalls other
            return item;
        }
        if (comparer.Compare(other.Start, item.Start) <= 0
            && comparer.Compare(other.End, item.End) >= 0)
        { // other fully swallows item
            return other;
        }
        if (comparer.Compare(item.Start, other.Start) <= 0
            && comparer.Compare(item.End, other.Start) >= 0
            && comparer.Compare(item.End, other.End) <= 0)
        { // partial overlap
            return new Range<T>(item.Start, other.End);
        }
        if (comparer.Compare(other.Start, item.Start) <= 0
             && comparer.Compare(other.End, item.Start) >= 0
            && comparer.Compare(other.End, item.End) <= 0)
        { // partial overlap
            return new Range<T>(other.Start, item.End);
        }
        return null;
    }
}
public struct Range<T>
{
    private readonly T start, end;
    public T Start { get { return start; } }
    public T End { get { return end; } }
    public Range(T start, T end)
    {
        this.start = start;
        this.end = end;
    }
    public override string ToString()
    {
        return start + " to " + end;
    }
}

static class Program
{
    static void Main()
    {
        var data = new[] 
        {
            Range.Create(1,5), Range.Create(3,9),
            Range.Create(11,15), Range.Create(12,14),
            Range.Create(13,20)
        };
        var result = data.Normalize();
        foreach (var item in result)
        {
            Console.WriteLine(item);
        }
    }
}
Marc Gravell
that was fast man
grenade
that duplicated code 'smells' ... :)
Mitch Wheat
@Mitch - yes, I'd probably refactor into a TryMerge method, i.e. if(TryMerge(other, item, out result)) {list[j] = result; list.RemoveAt(i));}
Marc Gravell
This one seems to be working nicely. Do you have a smart way of doing so that ranges that are right next to each other would be merged as well? So if you had (1,5) and (6,9) you would get (1,9)? Of course this would be a bit more complicated with dates perhaps... could maybe go through the list afterwards and check all the end points if they are neighbours or something...
Svish
And even more difficult with strings...
Svish
What would go in that TryMerge method?
Svish
The (1,5) (6,9) => (1,9) would need special handling, as most values are continuous - i.e. there is a definite gap between 5 and 6. With TryMerge - just a lot of the bumph from above; actually, there's a nicer way to simplify it - will update...
Marc Gravell
+3  A: 
static void Main(string[] args) {
    List<Range<int>> ranges = new List<Range<int>>() 
    {    
     new Range<int>(3,9),
     new Range<int>(1,5),
     new Range<int>(11,15),
     new Range<int>(12,14),
     new Range<int>(13,20),
    };

    var orderedRanges = ranges.OrderBy(r => r.Start);
    var lastRange = new Range<int>(orderedRanges.First().Start, orderedRanges.First().End);

    List<Range<int>> newranges = new List<Range<int>>();   
    newranges.Add(lastRange);

    foreach (var range in orderedRanges.Skip(1)) {
     if (range.Start >= lastRange.Start && range.Start <= lastRange.End && range.End > lastRange.End) {
      lastRange.End = range.End;
     }
     else if (range.Start > lastRange.End) {
      lastRange = new Range<int>(range.Start, range.End);
      newranges.Add(lastRange);
     }
    }

    foreach (var r in newranges) {
     Console.WriteLine("{0}, {1}", r.Start, r.End);
    }
}

Something like this. Didn't verify that it works with all inputs.

aquinas
+2  A: 

A Python solution for the non-verbosephile:

ranges = [
  (11, 15),
  (3, 9),
  (12, 14),
  (13, 20),
  (1, 5)]

result = []
cur = None
for start, stop in sorted(ranges): # sorts by start
  if cur is None:
    cur = (start, stop)
    continue
  cStart, cStop = cur
  if start <= cStop:
    cur = (cStart, max(stop, cStop))
  else:
    result.append(cur)
    cur = (start, stop)
result.append(cur)

print result
yairchu
+1 for no need to scroll.
Peter Recore
+1  A: 

The idea of collapsing a list just screamed out "reduce" to me. It didn't end up quite as elegant as I had hoped though.

def collapse(output,next_range):
    last_start,last_end = output[-1]
    next_start, next_end = next_range
    if (next_start <= last_end):
        output[-1] = (last_start, max(next_end, last_end))
    else:
        output.append(next_range)
    return output

ranges = [
  (11, 15),
  (3, 9),
  (12, 14),
  (13, 20),
  (1, 5)]

ranges.sort()
result = [ranges.pop(0)]
reduce(collapse, ranges,result)

print result

thanks to yairchu for typing in the data so I could cut and paste it :)

Peter Recore
A: 

Here is a simple looping impelmentation, but at least is clear.

  • It works for DateTime as well as Int, in my simple tests
  • Most of the complexity is in the Overlap/Combine methods on the range
  • The algorithm is actually easily understandable, no floating vars
  • Adds some ability to the Range class which is probably useful in general

-- this line intentionally meaningless, to fix markdown problem --

public static class CollapseRange
{
    public static IEnumerable<Range<T>> Collapse<T>(this IEnumerable<Range<T>> me)
        where T:struct
    {
        var result = new List<Range<T>>();
        var sorted = me.OrderBy(x => x.Start).ToList();
        do {
            var first = sorted.FirstOrDefault();
            sorted.Remove(first);
            while (sorted.Any(x => x.Overlap(first))) {
                var other = sorted.FirstOrDefault(x => x.Overlap(first));
                first = first.Combine(other);
                sorted.Remove(other);
            }
            result.Add(first);
        } while (sorted.Count > 0);
        return result;
    }
}

[DebuggerDisplay("Range {Start} - {End}")]
public class Range<T> where T : struct
{
    public T Start { set; get; }
    public T End { set; get; }
    public bool Overlap(Range<T> other)
    {
        return (Within(other.Start) || Within(other.End) || other.Within(this.Start) || other.Within(this.End));
    }
    public bool Within(T point)
    {
        var Comp = Comparer<T>.Default;
        var st = Comp.Compare(point, this.Start);
        var ed = Comp.Compare(this.End, point);
        return (st >= 0 && ed >= 0);
    }
    /// <summary>Combines to ranges, updating the current range</summary>
    public void Merge(Range<T> other)
    {
        var Comp = Comparer<T>.Default;
        if (Comp.Compare(this.Start, other.Start) > 0) this.Start = other.Start;
        if (Comp.Compare(other.End, this.End) > 0) this.End = other.End;
    }
    /// <summary>Combines to ranges, returning a new range in their place</summary>
    public Range<T> Combine(Range<T> other)
    {
        var Comp = Comparer<T>.Default;
        var newRange = new Range<T>() { Start = this.Start, End = this.End };
        newRange.Start = (Comp.Compare(this.Start, other.Start) > 0) ? other.Start : this.Start;
        newRange.End = (Comp.Compare(other.End, this.End) > 0) ? other.End : this.End;
        return newRange;
    }
}
Andrew Backer
Never seen that DebuggerDisplay attribute before. That is just brilliant :D
Svish
+6  A: 

This seems to works and is easy to understand.

    public static IEnumerable<Range<T>> Collapse<T>(this IEnumerable<Range<T>> me, IComparer<T> comparer)
    {
        List<Range<T>> orderdList = me.OrderBy(r => r.Start).ToList();
        List<Range<T>> newList = new List<Range<T>>();

        T max = orderdList[0].End;
        T min = orderdList[0].Start;

        foreach (var item in orderdList.Skip(1))
        {
            if (comparer.Compare(item.End, max) > 0 && comparer.Compare(item.Start, max) > 0)
            {
                newList.Add(new Range<T> { Start = min, End = max });
                min = item.Start;
            }
            max = comparer.Compare(max, item.End) > 0 ? max : item.End;
        }
        newList.Add(new Range<T>{Start=min,End=max});

        return newList;
    }


Here is the variation which I mentioned in the comments. It's basically the same thing, but with some checking and yielding of the results instead of collecting in a list before returning.

    public static IEnumerable<Range<T>> Collapse<T>(this IEnumerable<Range<T>> ranges, IComparer<T> comparer)
    {
        if(ranges == null || !ranges.Any())
            yield break;

        if (comparer == null)
            comparer = Comparer<T>.Default;

        var orderdList = ranges.OrderBy(r => r.Start);
        var firstRange = orderdList.First();

        T min = firstRange.Start;
        T max = firstRange.End;

        foreach (var current in orderdList.Skip(1))
        {
            if (comparer.Compare(current.End, max) > 0 && comparer.Compare(current.Start, max) > 0)
            {
                yield return Create(min, max);
                min = current.Start;
            }
            max = comparer.Compare(max, current.End) > 0 ? max : current.End;
        }
        yield return Create(min, max);
    }
Gary W
You should check if the list is empty, other than that, good approach.
Lasse V. Karlsen
Yeah, I went with a slight variation of this solution. Thanks =)
Svish
A: 

Tossing another hat into the ring. Very much the same implementation as Gary W's (from which I got the sorted list approach), but done as a test case and with some helpful functions added to the Range class.

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;

import edu.emory.mathcs.backport.java.util.Collections;

import junit.framework.TestCase;

public class Range2Test extends TestCase {
    public void testCollapse() throws Exception {
     Set<Range<Integer>> set = new HashSet<Range<Integer>>();
     set.add(new Range<Integer>(1, 5));
     set.add(new Range<Integer>(3, 9));
     set.add(new Range<Integer>(11, 15));
     set.add(new Range<Integer>(12, 14));
     set.add(new Range<Integer>(13, 20));
     Set<Range<Integer>> expected = new HashSet<Range<Integer>>();
     expected.add(new Range<Integer>(1, 9));
     expected.add(new Range<Integer>(11, 20));
     assertEquals(expected, collapse(set));
    }

    private static <T extends Comparable<T>> Set<Range<T>> collapse(Set<Range<T>> ranges) {
     if (ranges == null)
      return null;
     if (ranges.size() < 2)
      return new HashSet<Range<T>>(ranges);
     ArrayList<Range<T>> list = new ArrayList<Range<T>>(ranges);
     Collections.sort(list);
     Set<Range<T>> result = new HashSet<Range<T>>();
     Range<T> r = list.get(0);
     for (Range<T> range : list) 
      if (r.overlaps(range)) {
       r = r.union(range);
      } else {
       result.add(r);
       r = range;
      }
     result.add(r);
     return result;
    }

    private static class Range<T extends Comparable<T>> implements Comparable<Range<T>> {
     public Range(T start, T end) {
      if (start == null || end == null)
       throw new NullPointerException("Range requires start and end.");
      this.start = start;
      this.end = end;
     }
     public T start;
     public T end;

     private boolean contains(T t) {
      return start.compareTo(t) <= 0 && t.compareTo(end) <= 0;
     }

     public boolean overlaps(Range<T> that) {
      return this.contains(that.start) || that.contains(this.start);
     }

     public Range<T> union(Range<T> that) {
      T start = this.start.compareTo(that.start) < 0 ? this.start : that.start;
      T end = this.end.compareTo(that.end) > 0 ? this.end : that.end;
      return new Range<T>(start, end);
     }

     public String toString() {
      return String.format("%s - %s", start, end);
     }

     public int hashCode() {
      final int prime = 31;
      int result = 1;
      result = prime * result + end.hashCode();
      result = prime * result + start.hashCode();
      return result;
     }

     @SuppressWarnings("unchecked")
     public boolean equals(Object obj) {
  if (this == obj)     return true;
  if (obj == null)     return false;
  if (getClass() != obj.getClass()) return false;
  Range<T> that = (Range<T>) obj;
  return end.equals(that.end) && start.equals(that.start);
     }

     public int compareTo(Range<T> that) {
      int result = this.start.compareTo(that.start);
      if (result != 0)
       return result;
      return this.end.compareTo(that.end);
     }
    }
}
Carl Manaster
A: 

A ruby version. Sort the ranges before merge seems to be a good idea.

def merge a , b
    return b if a.nil?
    if b.begin <= a.end
        (a.begin..b.end)
    el
        [a , b ]     #no overlap
    end
end

ranges = [(1..5),(11..15),(3..9),(12..14),(13..20)]
sorted_ranges = ranges.sort_by {|r| r.begin}   #sorted by the start of the range

merged_ranges = sorted_ranges.inject([]) do |m , r|
       last = m.pop
       m << merge(last , r)   
       m.flatten
end

puts merged_ranges
pierr