I have some code that is really slow. I knew it would be and now it is. Basically, I am reading files from a bunch of directories. The file names change but the data does not. To determine if I have read the file, I am hashing it's bytes and comparing that to a list of hashes of already processed files. There are about 1000 files in each directory, and figuring out what's new in each directory takes a good minute or so (and then the processing starts). Here's the basic code:
public static class ProgramExtensions
{
public static byte[] ToSHA256Hash(this FileInfo file)
{
using (FileStream fs = new FileStream(file.FullName, FileMode.Open))
{
using (SHA256 hasher = new SHA256Managed())
{
return hasher.ComputeHash(fs);
}
}
}
public static string ToHexString(this byte[] p)
{
char[] c = new char[p.Length * 2 + 2];
byte b;
c[0] = '0'; c[1] = 'x';
for (int y = 0, x = 2; y < p.Length; ++y, ++x)
{
b = ((byte)(p[y] >> 4));
c[x] = (char)(b > 9 ? b + 0x37 : b + 0x30);
b = ((byte)(p[y] & 0xF));
c[++x] = (char)(b > 9 ? b + 0x37 : b + 0x30);
}
return new string(c);
}
}
class Program
{
static void Main(string[] args)
{
var allFiles = new DirectoryInfo("c:\\temp").GetFiles("*.*");
List<string> readFileHashes = GetReadFileHashes();
List<FileInfo> filesToRead = new List<FileInfo>();
foreach (var file in allFiles)
{
if (readFileHashes.Contains(file.ToSHA256Hash().ToHexString()))
filesToRead.Add(file);
}
//read new files
}
}
Is there anyway I can speed this up?