What is the most efficient way to enumerate every cell in every sheet in a workbook?
The method below seems to work reasonably for a workbook with ~130,000 cells. On my machine it took ~26 seconds to open the file and ~5 seconds to enumerate the cells . However I'm no Excel expert and wanted to validate this code snippet with the wider community.
DateTime timer = DateTime.Now;
Microsoft.Office.Interop.Excel.Application excelApplication = new Microsoft.Office.Interop.Excel.Application();
try
{
exampleFile = new FileInfo(Path.Combine(System.Environment.CurrentDirectory, "Large.xlsx"));
excelApplication.Workbooks.Open(exampleFile.FullName, false, false, missing, missing, missing, true, missing, missing, true, missing, missing, missing, missing, missing);
Console.WriteLine(string.Format("Took {0} seconds to open file", (DateTime.Now - timer).Seconds.ToString()));
timer = DateTime.Now;
foreach(Workbook workbook in excelApplication.Workbooks)
{
foreach(Worksheet sheet in workbook.Sheets)
{
int i = 0, iRowMax, iColMax;
string data = String.Empty;
Object[,] rangeData = (System.Object[,]) sheet.UsedRange.Cells.get_Value(missing);
if (rangeData != null)
{
iRowMax = rangeData.GetUpperBound(0);
iColMax = rangeData.GetUpperBound(1);
for (int iRow = 1; iRow < iRowMax; iRow++)
{
for(int iCol = 1; iCol < iColMax; iCol++)
{
data = rangeData[iRow, iCol] != null ? rangeData[iRow, iCol].ToString() : string.Empty;
if (i % 100 == 0)
{
Console.WriteLine(String.Format("Processed {0} cells.", i));
}
i++;
}
}
}
}
workbook.Close(false, missing, missing);
}
Console.WriteLine(string.Format("Took {0} seconds to parse file", (DateTime.Now - timer).Seconds.ToString()));
}
finally
{
excelApplication.Workbooks.Close();
excelApplication.Quit();
}
Edit:
Worth stating that I want to use PIA and interop in order to access properties of excel workbooks that are not exposed by API's that work directly with the Excel file.