I have an array with a huge amounts of IDs I would like to select out from the DB.
The usual approach would be to do select blabla from xxx where yyy IN (ids) OPTION (RECOMPILE)
.
(The option recompile is needed, because SQL server is not intelligent enough to see that putting this query in its query cache is a huge waste of memory)
However, SQL Server is horrible at this type of query when the amount of IDs are high, the parser that it uses to simply too slow. Let me give an example:
SELECT * FROM table WHERE id IN (288525, 288528, 288529,<about 5000 ids>, 403043, 403044) OPTION (RECOMPILE)
Time to execute: ~1100 msec (This returns appx 200 rows in my example)
Versus:
SELECT * FROM table WHERE id BETWEEN 288525 AND 403044 OPTION (RECOMPILE)
Time to execute: ~80 msec (This returns appx 50000 rows in my example)
So even though I get 250 times more data back, it executes 14 times faster...
So I built this function to take my list of ids and build something that will return a reasonable compromise between the two (something that doesn't return 250 times as much data, yet still gives the benefit of parsing the query faster)
private const int MAX_NUMBER_OF_EXTRA_OBJECTS_TO_FETCH = 5;
public static string MassIdSelectionStringBuilder(
List<int> keys, ref int startindex, string colname)
{
const int maxlength = 63000;
if (keys.Count - startindex == 1)
{
string idstring = String.Format("{0} = {1}", colname, keys[startindex]);
startindex++;
return idstring;
}
StringBuilder sb = new StringBuilder(maxlength + 1000);
List<int> individualkeys = new List<int>(256);
int min = keys[startindex++];
int max = min;
sb.Append("(");
const string betweenAnd = "{0} BETWEEN {1} AND {2}\n";
for (; startindex < keys.Count && sb.Length + individualkeys.Count * 8 < maxlength; startindex++)
{
int key = keys[startindex];
if (key > max+MAX_NUMBER_OF_EXTRA_OBJECTS_TO_FETCH)
{
if (min == max)
individualkeys.Add(min);
else
{
if(sb.Length > 2)
sb.Append(" OR ");
sb.AppendFormat(betweenAnd, colname, min, max);
}
min = max = key;
}
else
{
max = key;
}
}
if (min == max)
individualkeys.Add(min);
else
{
if (sb.Length > 2)
sb.Append(" OR ");
sb.AppendFormat(betweenAnd, colname, min, max);
}
if (individualkeys.Count > 0)
{
if (sb.Length > 2)
sb.Append(" OR ");
string[] individualkeysstr = new string[individualkeys.Count];
for (int i = 0; i < individualkeys.Count; i++)
individualkeysstr[i] = individualkeys[i].ToString();
sb.AppendFormat("{0} IN ({1})", colname, String.Join(",",individualkeysstr));
}
sb.Append(")");
return sb.ToString();
}
It is then used like this:
List<int> keys; //Sort and make unique
...
for (int i = 0; i < keys.Count;)
{
string idstring = MassIdSelectionStringBuilder(keys, ref i, "id");
string sqlstring = string.Format("SELECT * FROM table WHERE {0} OPTION (RECOMPILE)", idstring);
However, my question is... Does anyone know of a better/faster/smarter way to do this?