Disclaimer This is premature optimization. For your example arrays, use the 3.5 extension methods. Until you know you have a performance problem in this region, you should use library code.
If you can sort the arrays, or they're sorted when you get to that point in the code, you can use the following methods.
These will pull one item from both, and produce the "lowest" item, then fetch a new item from the corresponding source, until both sources are exhausted. In the case where the current item fetched from the two sources are equal, it will produce the one from the first source, and skip them in both sources.
private static IEnumerable<T> Merge<T>(IEnumerable<T> source1,
IEnumerable<T> source2)
{
return Merge(source1, source2, Comparer<T>.Default);
}
private static IEnumerable<T> Merge<T>(IEnumerable<T> source1,
IEnumerable<T> source2, IComparer<T> comparer)
{
#region Parameter Validation
if (Object.ReferenceEquals(null, source1))
throw new ArgumentNullException("source1");
if (Object.ReferenceEquals(null, source2))
throw new ArgumentNullException("source2");
if (Object.ReferenceEquals(null, comparer))
throw new ArgumentNullException("comparer");
#endregion
using (IEnumerator<T>
enumerator1 = source1.GetEnumerator(),
enumerator2 = source2.GetEnumerator())
{
Boolean more1 = enumerator1.MoveNext();
Boolean more2 = enumerator2.MoveNext();
while (more1 && more2)
{
Int32 comparisonResult = comparer.Compare(
enumerator1.Current,
enumerator2.Current);
if (comparisonResult < 0)
{
// enumerator 1 has the "lowest" item
yield return enumerator1.Current;
more1 = enumerator1.MoveNext();
}
else if (comparisonResult > 0)
{
// enumerator 2 has the "lowest" item
yield return enumerator2.Current;
more2 = enumerator2.MoveNext();
}
else
{
// they're considered equivalent, only yield it once
yield return enumerator1.Current;
more1 = enumerator1.MoveNext();
more2 = enumerator2.MoveNext();
}
}
// Yield rest of values from non-exhausted source
while (more1)
{
yield return enumerator1.Current;
more1 = enumerator1.MoveNext();
}
while (more2)
{
yield return enumerator2.Current;
more2 = enumerator2.MoveNext();
}
}
}
Note that if one of the sources contains duplicates, you might see duplicates in the output. If you want to remove these duplicates in the already sorted lists, use the following method:
private static IEnumerable<T> CheapDistinct<T>(IEnumerable<T> source)
{
return CheapDistinct<T>(source, Comparer<T>.Default);
}
private static IEnumerable<T> CheapDistinct<T>(IEnumerable<T> source,
IComparer<T> comparer)
{
#region Parameter Validation
if (Object.ReferenceEquals(null, source))
throw new ArgumentNullException("source");
if (Object.ReferenceEquals(null, comparer))
throw new ArgumentNullException("comparer");
#endregion
using (IEnumerator<T> enumerator = source.GetEnumerator())
{
if (enumerator.MoveNext())
{
T item = enumerator.Current;
// scan until different item found, then produce
// the previous distinct item
while (enumerator.MoveNext())
{
if (comparer.Compare(item, enumerator.Current) != 0)
{
yield return item;
item = enumerator.Current;
}
}
// produce last item that is left over from above loop
yield return item;
}
}
}
Note that none of these will internally use a data structure to keep a copy of the data, so they will be cheap if the input is sorted. If you can't, or won't, guarantee that, you should use the 3.5 extension methods that you've already found.
Here's example code that calls the above methods:
String[] list_1 = { "apple", "orange", "apple", "banana" };
String[] list_2 = { "banana", "pear", "grape" };
Array.Sort(list_1);
Array.Sort(list_2);
IEnumerable<String> items = Merge(
CheapDistinct(list_1),
CheapDistinct(list_2));
foreach (String item in items)
Console.Out.WriteLine(item);