I am starting to write my first parallel applications. This partitioner will enumerate over a IDataReader
pulling chunkSize
records at a time from the data-source.
TLDR; version
private object _Lock = new object();
public IEnumerator GetEnumerator()
{
var infoSource = myInforSource.GetEnumerator();
//Will this cause a deadlock if two threads
lock (_Lock) //use the enumator at the same time?
{
while (infoSource.MoveNext())
{
yield return infoSource.Current;
}
}
}
full code
protected class DataSourcePartitioner<object[]> : System.Collections.Concurrent.Partitioner<object[]>
{
private readonly System.Data.IDataReader _Input;
private readonly int _ChunkSize;
public DataSourcePartitioner(System.Data.IDataReader input, int chunkSize = 10000)
: base()
{
if (chunkSize < 1)
throw new ArgumentOutOfRangeException("chunkSize");
_Input = input;
_ChunkSize = chunkSize;
}
public override bool SupportsDynamicPartitions { get { return true; } }
public override IList<IEnumerator<object[]>> GetPartitions(int partitionCount)
{
var dynamicPartitions = GetDynamicPartitions();
var partitions =
new IEnumerator<object[]>[partitionCount];
for (int i = 0; i < partitionCount; i++)
{
partitions[i] = dynamicPartitions.GetEnumerator();
}
return partitions;
}
public override IEnumerable<object[]> GetDynamicPartitions()
{
return new ListDynamicPartitions(_Input, _ChunkSize);
}
private class ListDynamicPartitions : IEnumerable<object[]>
{
private System.Data.IDataReader _Input;
int _ChunkSize;
private object _ChunkLock = new object();
public ListDynamicPartitions(System.Data.IDataReader input, int chunkSize)
{
_Input = input;
_ChunkSize = chunkSize;
}
public IEnumerator<object[]> GetEnumerator()
{
while (true)
{
List<object[]> chunk = new List<object[]>(_ChunkSize);
lock(_Input)
{
for (int i = 0; i < _ChunkSize; ++i)
{
if (!_Input.Read())
break;
var values = new object[_Input.FieldCount];
_Input.GetValues(values);
chunk.Add(values);
}
if (chunk.Count == 0)
yield break;
}
var chunkEnumerator = chunk.GetEnumerator();
lock(_ChunkLock) //Will this cause a deadlock?
{
while (chunkEnumerator.MoveNext())
{
yield return chunkEnumerator.Current;
}
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return ((IEnumerable<object[]>)this).GetEnumerator();
}
}
}
I wanted IEnumerable
object it passed back to be thread safe (the MSDN example was so I am assuming PLINQ and TPL could need it) will the lock on _ChunkLock
near the bottom help provide thread safety or will it cause a deadlock? From the documentation I could not tell if the lock would be released on the yeld return
.
Also if there is built in functionality to .net that will do what I am trying to do I would much rather use that. And if you find any other problems with the code I would appreciate it.