SortedDictionary
(.NET 2+) or SortedSet
(.NET 4 only) is probably what you want. They are tree structures.
SortedList
is a dumb class which is no different from List
structurally.
However, it is still not entirely clear to me why you need this list as sorted.
Maybe if you could elaborate on this matter we could find a solution where you don't need sorting at all. For example a simple HashSet
could do. It is faster at both lookups and insertions than SortedList
or any of the tree structures if hashing is done properly.
Ok, now when it is clear to me that you wanted sorted lists merging, I can try to write an implementation.
At first, I implemented merging using SortedDictionary
to store heads of all the arrays. At each iteration I removed the smallest element from the dictionary and added the next one from the same array. Performance tests showed that overhead of SortedDictionary is huge, so that it is almost impossible to make it faster than simple concatenation+sorting. It even struggles to match SortedList
performance on small tests.
Then I replaced SortedDictionary
with custom-made binary heap implementation. Performance improvement was tremendous (more than 6 times). This Heap implementation even manages to beat .Distinct()
(which is usually the fastest) in some tests.
Here is my code:
class Heap<T>
{
public Heap(int limit, IComparer<T> comparer)
{
this.comparer = comparer;
data = new T[limit];
}
int count = 0;
T[] data;
public void Add(T t)
{
data[count++] = t;
promote(count-1);
}
IComparer<T> comparer;
public int Count { get { return count; } }
public T Pop()
{
T result = data[0];
fill(0);
return result;
}
bool less(T a, T b)
{
return comparer.Compare(a,b)<0;
}
void fill(int index)
{
int child1 = index*2+1;
int child2 = index*2+2;
if(child1 >= Count)
{
data[index] = data[--count];
if(index!=count)
promote(index);
}
else
{
int bestChild = child1;
if(child2 < Count && less(data[child2], data[child1]))
{
bestChild = child2;
}
data[index] = data[bestChild];
fill(bestChild);
}
}
void promote(int index)
{
if(index==0)
return;
int parent = (index-1)/2;
if(less(data[index], data[parent]))
{
T tmp = data[parent];
data[parent] = data[index];
data[index] = tmp;
promote(parent);
}
}
}
struct ArrayCursor<T>
{
public T [] Array {get;set;}
public int Index {get;set;}
public bool Finished {get{return Array.Length == Index;}}
public T Value{get{return Array[Index];}}
}
class ArrayComparer<T> : IComparer<ArrayCursor<T>>
{
IComparer<T> comparer;
public ArrayComparer(IComparer<T> comparer)
{
this.comparer = comparer;
}
public int Compare (ArrayCursor<T> a, ArrayCursor<T> b)
{
return comparer.Compare(a.Value, b.Value);
}
}
static class HeapMerger
{
public static IEnumerable<T> MergeUnique<T>(this T[][] arrays)
{
bool first = true;
T last = default(T);
IEqualityComparer<T> eq = EqualityComparer<T>.Default;
foreach(T i in Merge(arrays))
if(first || !eq.Equals(last,i))
{
yield return i;
last = i;
first = false;
}
}
public static IEnumerable<T> Merge<T>(this T[][] arrays)
{
var map = new Heap<ArrayCursor<T>>(arrays.Length, new ArrayComparer<T>(Comparer<T>.Default));
Action<ArrayCursor<T>> tryAdd = (a)=>
{
if(!a.Finished)
map.Add(a);
};
for(int i=0;i<arrays.Length;i++)
tryAdd(new ArrayCursor<T>{Array=arrays[i], Index=0});
while(map.Count>0)
{
ArrayCursor<T> lowest = map.Pop();
yield return lowest.Value;
lowest.Index++;
tryAdd(lowest);
}
}
}