Levenshtein distance? I think the first solution that you have already accepted has a flaw. It will tell you everything is out of order even if one little thing is inserted:
string[] list1 = { "a", "c", "b", "d", "j", "e", "f" };
string[] list2 = { "a", "d", "e", "f", "h", "j" };
that says that j, e, f are out of order, because the j was inserted.
This points out the problem you face. There are multiple solutions, even more than one optimal solution, to the problem of what is out of order. Is the J in order or the e and the f? Are they all out of order? There is something called the Levenshtein distance algorithm that finds a minimal number of inserts and delete operations necessary to start with set A and end up with set B. There are multiple best solutions, this just finds one of them.
This following algorithm correctly outputs that in list1, j was inserted and e, f are shifted but still in the correct order.
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using Math = System.Math;
namespace LevCompareLists {
class Program {
static void Main(string[] args) {
string[] list1 = { "a", "c", "b", "d", "j", "e", "f" };
string[] list2 = { "a", "d", "e", "f", "h", "j" };
int?[] aMap21 = Levenshtein(list2, list1);
int?[] aMap12 = Levenshtein(list1, list2);
}
public static int?[] Levenshtein(string[] Src, String[] Dst) {
// this finds a minimum difference solution of inserts and deletes that maps Src to Dst
// it returns the map from the perspective of Dst, i.e.:
// each element of the return array contains the Src index of the corresponging element in B
// a null value means the element in B was inserted, and never existed in A
//
// Given A = {"a", "c", "b", "d", "j", "e", "f"}
// B = {"a", "d", "e", "f", "h", "j"};
//
// Levenshtein(B, A):
// a c b d j e f <-- A
// 0 1 2 3 <-- aMap
// a d e f h j <-- B
//
// Levenshtein(A, B):
// a d e f h j <-- B
// 0 3 5 6 <-- aMap
// a c b d j e f <-- A
//
// see: http://en.wikipedia.org/wiki/Levenshtein_distance
int cSrc = Src.Length; //length of s
int cDst = Dst.Length; //length of t
if (cSrc == 0 || cDst == 0) return null;
//**** create the Levenshtein matrix
// it has at 1 extra element in each dimension to contain the edges
int[,] aLev = new int[cSrc + 1, cDst + 1]; // the matrix
int iSrc, iDst;
// Load the horizontal and vertical edges
for (iSrc = 0; iSrc <= cSrc; aLev[iSrc, 0] = iSrc++) ;
for (iDst = 0; iDst <= cDst; aLev[0, iDst] = iDst++) ;
// load the interior
for (iSrc = 1; iSrc <= cSrc; iSrc++)
for (iDst = 1; iDst <= cDst; iDst++)
aLev[iSrc, iDst] = Math.Min(Math.Min(aLev[iSrc - 1, iDst] + 1, aLev[iSrc, iDst - 1] + 1),
aLev[iSrc - 1, iDst - 1] + ((Dst[iDst - 1] == Src[iSrc - 1]) ? 0 : 2));
DumpLevMatrix(aLev, Src, Dst); // Debug
//**** create the return map, using the Levenshtein matrix
int?[] aMap = new int?[cDst]; // this is the return map
iSrc = cSrc; // start in lower right corner of the Levenshtein matrix
iDst = cDst; // start in lower right corner of the Levenshtein matrix
// work backwards to pick best solution
while ((iSrc >= 0) || (iDst >= 0)) {
if ((iSrc > 0) && (iDst > 0)) {
// enter here if iSrc and iDst are in the lev matrix and not on its edge
int nCur = aLev[iSrc, iDst];
int nIns = nCur - aLev[iSrc, iDst - 1]; // if move along B to find match, it was an insert
int nDel = nCur - aLev[iSrc - 1, iDst]; // if move along A to find match, it was a deletion
if (nIns == 1) // this char was NOT in A, but was inserted into B
iDst--; // Leave map of B[j] to nowher, scan to previous B (--j)
else if (nDel == 1) // this char was in A, but is missing in B
iSrc--; // Don't map any B, scan to previous A (--i)
else // Match
aMap[iDst-- - 1] = iSrc-- - 1; // After map B[j] to A[i], scan to prev A,B (--i, --j)
} else {
if (iDst > 0) // remaining chars are inserts, Leave map of B[j] to nowher, scan to previous B (--j)
iDst--;
else if (iSrc > 0) // Delete to the end, deletes do nothing
iSrc--;
else
break;
}
}
DumpMap(aMap, Dst); // Debug
return aMap;
}
// just for debugging
static void DumpLevMatrix(int[,] aLev, string[] Src, string[] Dst) {
StringBuilder sb = new StringBuilder();
int cSrc = Src.Length;
int cDst = Dst.Length;
int iSrc, iDst;
sb.Length = 6;
for (iDst = 0; iDst < cDst; ++iDst)
sb.AppendFormat("{0,-3}", Dst[iDst]);
Console.WriteLine(sb.ToString());
for (iSrc = 0; iSrc <= cSrc; ++iSrc) {
if (iSrc == 0)
sb.Length = 3;
else {
sb.Length = 0;
sb.AppendFormat("{0,-3}", Src[iSrc - 1]);
}
for (iDst = 0; iDst <= cDst; ++iDst)
sb.AppendFormat("{0:00}", aLev[iSrc, iDst]).Append(" ");
Console.WriteLine(sb.ToString());
}
}
// just for debugging
static void DumpMap(int?[] aMap, string[] Dst) {
StringBuilder sb = new StringBuilder();
for (int iMap = 0; iMap < aMap.Length; ++iMap)
sb.AppendFormat("{0,-3}", Dst[iMap]); // dst and map are same size
Console.WriteLine(sb.ToString());
sb.Length = 0;
for (int iMap = 0; iMap < aMap.Length; ++iMap)
if (aMap[iMap] == null)
sb.Append(" ");
else
sb.AppendFormat("{0:00}", aMap[iMap]).Append(" ");
Console.WriteLine(sb.ToString());
}
}
}