views:

95

answers:

4

I have simple metohds to export DataTable to xls using string. Number of columns is 5 - 30, and number or rows might be from 1 to 1000. Sometimes there is problem with performance, and I please for advice what can I change in my code. I'm using .net 4.0

public string FormatCell(string columnName, object value)
        {
        StringBuilder builder = new StringBuilder();
        string formattedValue = string.Empty;
        string type = "String";
        string style = "s21";

        if (!(value is DBNull) && columnName.Contains("GIS"))
            formattedValue = Convert.ToDouble(value).ToString("##.00000000°");
        else if (value is DateTime)
        {
            style = "s22";
            type = "DateTime";
            DateTime date = (DateTime)value;
            formattedValue = date.ToString("yyyy-MM-ddTHH:mm:ss.fff");
        }
        else if (value is double || value is float || value is decimal)
        {
            formattedValue = Convert.ToDecimal(value).ToString("#.00").Replace(',', '.');
            type = "Number";
        }
        else if (value is int)
        {
            formattedValue = value.ToString();
            type = "Number";
        }
        else
            formattedValue = value.ToString();

        builder.Append(string.Format("<Cell ss:StyleID=\"{0}\"><Data ss:Type=\"{1}\">", style, type));

        builder.Append(formattedValue);
        builder.AppendLine("</Data></Cell>");

        return builder.ToString();
    }

    public string ConvertToXls(DataTable table)
    {
        StringBuilder builder = new StringBuilder();

        int rows = table.Rows.Count + 1;
        int cols = table.Columns.Count;

        builder.AppendLine("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
        builder.AppendLine("<?mso-application progid=\"Excel.Sheet\"?>");
        builder.AppendLine("<Workbook xmlns=\"urn:schemas-microsoft-com:office:spreadsheet\"");
        builder.AppendLine(" xmlns:o=\"urn:schemas-microsoft-com:office:office\"");
        builder.AppendLine(" xmlns:x=\"urn:schemas-microsoft-com:office:excel\"");
        builder.AppendLine(" xmlns:ss=\"urn:schemas-microsoft-com:office:spreadsheet\"");
        builder.AppendLine(" xmlns:html=\"http://www.w3.org/TR/REC-html40/\"&gt;");
        builder.AppendLine(" <DocumentProperties xmlns=\"urn:schemas-microsoft-com:office:office\">;");
        builder.AppendLine("  <Author>Author</Author>");
        builder.AppendLine(string.Format("  <Created>{0}T{1}Z</Created>", DateTime.Now.ToString("yyyy-mm-dd"), DateTime.Now.ToString("HH:MM:SS")));
        builder.AppendLine("  <Company>Company</Company>");
        builder.AppendLine("  <Version>1.0</Version>");
        builder.AppendLine(" </DocumentProperties>");
        builder.AppendLine(" <ExcelWorkbook xmlns=\"urn:schemas-microsoft-com:office:excel\">");
        builder.AppendLine("  <WindowHeight>8955</WindowHeight>");
        builder.AppendLine("  <WindowWidth>11355</WindowWidth>");
        builder.AppendLine("  <WindowTopX>480</WindowTopX>");
        builder.AppendLine("  <WindowTopY>15</WindowTopY>");
        builder.AppendLine("  <ProtectStructure>False</ProtectStructure>");
        builder.AppendLine("  <ProtectWindows>False</ProtectWindows>");
        builder.AppendLine(" </ExcelWorkbook>");
        builder.AppendLine(" <Styles>");
        builder.AppendLine("  <Style ss:ID=\"Default\" ss:Name=\"Normal\">");
        builder.AppendLine("   <Alignment ss:Vertical=\"Bottom\"/>");
        builder.AppendLine("   <Borders/>");
        builder.AppendLine("   <Font/>");
        builder.AppendLine("   <Interior/>");
        builder.AppendLine("   <Protection/>");
        builder.AppendLine("  </Style>");
        builder.AppendLine("  <Style ss:ID=\"s21\">");
        builder.AppendLine("   <Alignment ss:Vertical=\"Bottom\" ss:WrapText=\"1\"/>");
        builder.AppendLine("  </Style>");
        builder.AppendLine("  <Style ss:ID=\"s22\">");
        builder.AppendLine("    <NumberFormat ss:Format=\"Short Date\"/>");
        builder.AppendLine("  </Style>");
        builder.AppendLine(" </Styles>");
        builder.AppendLine(" <Worksheet ss:Name=\"Export\">");
        builder.AppendLine(string.Format("  <Table ss:ExpandedColumnCount=\"{0}\" ss:ExpandedRowCount=\"{1}\" x:FullColumns=\"1\"", cols.ToString(), rows.ToString()));
        builder.AppendLine("   x:FullRows=\"1\">");

        //generate title
        builder.AppendLine("<Row>");
        foreach (DataColumn eachColumn in table.Columns)  // you can write a half columns of table and put the remaining columns in sheet2
        {
            if (eachColumn.ColumnName != "ID")
            {
                builder.Append("<Cell ss:StyleID=\"s21\"><Data ss:Type=\"String\">");
                builder.Append(eachColumn.ColumnName.ToString());
                builder.AppendLine("</Data></Cell>");
            }
        }
        builder.AppendLine("</Row>");

        //generate data
        foreach (DataRow eachRow in table.Rows)
        {
            builder.AppendLine("<Row>");
            foreach (DataColumn eachColumn in table.Columns)
            {
                if (eachColumn.ColumnName != "ID")
                {
                    builder.AppendLine(FormatCell(eachColumn.ColumnName, eachRow[eachColumn]));
                }
            }
            builder.AppendLine("</Row>");
        }
        builder.AppendLine("  </Table>");
        builder.AppendLine("  <WorksheetOptions xmlns=\"urn:schemas-microsoft-com:office:excel\">");
        builder.AppendLine("   <Selected/>");
        builder.AppendLine("   <Panes>");
        builder.AppendLine("    <Pane>");
        builder.AppendLine("     <Number>3</Number>");
        builder.AppendLine("     <ActiveRow>1</ActiveRow>");
        builder.AppendLine("    </Pane>");
        builder.AppendLine("   </Panes>");
        builder.AppendLine("   <ProtectObjects>False</ProtectObjects>");
        builder.AppendLine("   <ProtectScenarios>False</ProtectScenarios>");
        builder.AppendLine("  </WorksheetOptions>");
        builder.AppendLine(" </Worksheet>");
        builder.AppendLine(" <Worksheet ss:Name=\"Sheet2\">");
        builder.AppendLine("  <WorksheetOptions xmlns=\"urn:schemas-microsoft-com:office:excel\">");
        builder.AppendLine("   <ProtectObjects>False</ProtectObjects>");
        builder.AppendLine("   <ProtectScenarios>False</ProtectScenarios>");
        builder.AppendLine("  </WorksheetOptions>");
        builder.AppendLine(" </Worksheet>");
        builder.AppendLine(" <Worksheet ss:Name=\"Sheet3\">");
        builder.AppendLine("  <WorksheetOptions xmlns=\"urn:schemas-microsoft-com:office:excel\">");
        builder.AppendLine("   <ProtectObjects>False</ProtectObjects>");
        builder.AppendLine("   <ProtectScenarios>False</ProtectScenarios>");
        builder.AppendLine("  </WorksheetOptions>");
        builder.AppendLine(" </Worksheet>");
        builder.AppendLine("</Workbook>");

        return builder.ToString();
    }

using this:

string xlsData= ConvertToXls(someTable)


System.CodeDom.Compiler.TempFileCollection fileCollection = new System.CodeDom.Compiler.TempFileCollection();

                    string tempFileName = fileCollection.AddExtension("xls", true);

                    if (File.Exists(tempFileName))
                        File.Delete(tempFileName);

                    using (StreamWriter writer = new StreamWriter(tempFileName, false, Encoding.UTF8))
                        writer.Write(xlsData);
A: 

Well, you are just creating a bigger and bigger string in memory...so that would get worse and worse as the size goes up.

Is there any reason you aren't streaming this out to a file as you go, instead of building a GIANT string, and then serializing that out to a file?

Edit after you added your details:

Instead of having ConvertToXLS return a string, pass that streamwriter TO your convertToXLS method.

public void ConvertToXLS( DataTable table, StreamWriter stream )
{
    ...
}

inside of ConverToXLS, get rid of that StringBuilder, and replace all the calls of builder.AppendLine( x ) to

stream.WriteLine(x); 

That way as you are going, you're writing to the stream instead of creating a giant string.

John Gardner
Streaming ? What you mean? Could you give some example or link? I added code how I using this method.
@user278618: Streaming is when instead of sending data in one large piece it is sent continuously in small pieces. Like having a stream of water to your house through a pipe instead of a weekly delivery of a large water tank.
Zan Lynx
+1  A: 

You should profile your code with something like dotTrace to see where the time is going. At least put in timers to see how long each part is taking. Optimising without knowing where the bottleneck is is likely to be a waste of time. EG:

   DateTime startTime = DateTime.Now;
   Debug.WriteLine("Start : " + startTime);

   //some code

   Debug.WriteLine("End: " + DateTime.Now);
   Debug.WriteLine("Elapsed : " + (DateTime.Now - startTime));

I think John above is correct though. Use a Stream. eg.

StreamWriter streamWriter = System.IO.File.CreateText("c:\\mynewfile.xls");

streamWriter.AutoFlush = false;

//lots of writes

streamWriter.Flush();
streamWriter.Close();

You should test with autoflush false and true. You may also want to try a memory stream.

StreamWriter streamWriter = new StreamWriter(new MemoryStream());
Alistair
+1  A: 
jamietre
A: 

Instead of writing the lines out twice, once in memory and then writing to disk, try to get it down to one write operation. Straight to disk.

I have no idea what the performance comparison is like between the xml objects in .net and the stringbuilder but if I knew I was writing out Xml, I would tend to go for the xml object solutions, xmlwriter xlinq etc. The comfort in knowing that the data your producing is xml compliant on time every time is very reassuring.

Other posts on SS have stated that they think its faster using XmlTextWriter than StringBuilder.

http://stackoverflow.com/questions/2478427/stringbuilder-vs-xmltextwriter.

The answers about changing buffer sizes and delaying writes will work, but can be very hit and miss, yes your operations get faster if you do it all in memory, but then your memory footprint may get very large, so that the OS might do some disk swapping which affects the entire machine.(depending what you got running on your machine). Find the happy compromise and then stream the data at the write speed that your production system is happy with.

WeNeedAnswers