I have a SQL data reader that reads 2 columns from a sql db table. once it has done its bit it then starts again selecting another 2 columns.
I would pull the whole lot in one go but that presents a whole other set of challenges.
My problem is that the table contains a large amount of data (some 3 million rows or so) which makes working with the entire set a bit of a problem.
I'm trying to validate the field values so i'm pulling the ID column then one of the other cols and running each value in the column through a validation pipeline where the results are stored in another database.
My problem is that when the reader hits the end of handlin one column I need to force it to immediately clean up every little block of ram used as this process uses about 700MB and it has about 200 columns to go through.
Without a full Garbage Collect I will definately run out of ram.
Anyone got any ideas how I can do this?
I'm using lots of small reusable objects, my thought was that I could just call GC.Collect() on the end of each read cycle and that would flush everything out, unfortunately that isn't happening for some reason.
Ok i hope this fits but here's the method in question ...
void AnalyseTable(string ObjectName, string TableName)
{
Console.WriteLine("Initialising analysis process for SF object \"" + ObjectName + "\"");
Console.WriteLine(" The data being used is in table [" + TableName + "]");
// get some helpful stuff from the databases
SQLcols = Target.GetData("SELECT Column_Name, Is_Nullable, Data_Type, Character_Maximum_Length FROM information_schema.columns WHERE table_name = '" + TableName + "'");
SFcols = SchemaSource.GetData("SELECT * FROM [" + ObjectName + "Fields]");
PickLists = SchemaSource.GetData("SELECT * FROM [" + ObjectName + "PickLists]");
// get the table definition
DataTable resultBatch = new DataTable();
resultBatch.TableName = TableName;
int counter = 0;
foreach (DataRow Column in SQLcols.Rows)
{
if (Column["Column_Name"].ToString().ToLower() != "id")
resultBatch.Columns.Add(new DataColumn(Column["Column_Name"].ToString(), typeof(bool)));
else
resultBatch.Columns.Add(new DataColumn("ID", typeof(string)));
}
// create the validation results table
//SchemaSource.CreateTable(resultBatch, "ValidationResults_");
// cache the id's from the source table in the validation table
//CacheIDColumn(TableName);
// validate the source table
// iterate through each sql column
foreach (DataRow Column in SQLcols.Rows)
{
// we do this here to save making this call a lot more later
string colName = Column["Column_Name"].ToString().ToLower();
// id col is only used to identify records not in validation
if (colName != "id")
{
// prepare to process
counter = 0;
resultBatch.Rows.Clear();
resultBatch.Columns.Clear();
resultBatch.Columns.Add(new DataColumn("ID", typeof(string)));
resultBatch.Columns.Add(new DataColumn(colName, typeof(bool)));
// identify matching SF col
foreach (DataRow SFDefinition in SFcols.Rows)
{
// case insensitive compare on the col name to ensure we have a match ...
if (SFDefinition["Name"].ToString().ToLower() == colName)
{
// select the id column and the column data to validate (current column data)
using (SqlCommand com = new SqlCommand("SELECT ID, [" + colName + "] FROM [" + TableName + "]", new SqlConnection(ConfigurationManager.ConnectionStrings["AnalysisTarget"].ConnectionString)))
{
com.Connection.Open();
SqlDataReader reader = com.ExecuteReader();
Console.WriteLine(" Validating column \"" + colName + "\"");
// foreach row in the given object dataset
while (reader.Read())
{
// create a new validation result row
DataRow result = resultBatch.NewRow();
bool hasFailed = false;
// validate it
object vResult = ValidateFieldValue(SFDefinition, reader[Column["Column_Name"].ToString()]);
// if we have the relevant col definition lets decide how to validate this value ...
result[colName] = vResult;
if (vResult is bool)
{
// if it's deemed to have failed validation mark it as such
if (!(bool)vResult)
hasFailed = true;
}
// no point in adding rows we can't trace
if (reader["id"] != DBNull.Value && reader["id"] != null)
{
// add the failed row to the result set
if (hasFailed)
{
result["id"] = reader["id"];
resultBatch.Rows.Add(result);
}
}
// submit to db in batches of 200
if (resultBatch.Rows.Count > 199)
{
counter += resultBatch.Rows.Count;
Console.Write(" Result batch completed,");
SchemaSource.Update(resultBatch, "ValidationResults_");
Console.WriteLine(" committed " + counter.ToString() + " fails to the database so far.");
Console.SetCursorPosition(0, Console.CursorTop-1);
resultBatch.Rows.Clear();
}
}
// get rid of these likely very heavy objects
reader.Close();
reader.Dispose();
com.Connection.Close();
com.Dispose();
// ensure .Net does a full cleanup because we will need the resources.
GC.Collect();
if (resultBatch.Rows.Count > 0)
{
counter += resultBatch.Rows.Count;
Console.WriteLine(" All batches for column complete,");
SchemaSource.Update(resultBatch, "ValidationResults_");
Console.WriteLine(" committed " + counter.ToString() + " fails to the database.");
}
}
}
}
}
Console.WriteLine(" Completed processing column \"" + colName + "\"");
Console.WriteLine("");
}
Console.WriteLine("Object processing complete.");
}