I have a question concerning Python multiprocessing. I am trying to take a dataset, break into chunks, and pass those chunks to concurrently running processes. I need to transform large tables of data using simple calculations (eg. electrical resistance -> temperature for a thermistor).
The code listed below almost works as desired, but it doesn't seem to be spawning any new processes (or if so only one at a time). I am new to Python, so there is probably quite a simple solution to this problem.
Thanks in advance!
from multiprocessing import Process class Worker(Process): # example data transform def process(self, x): return (x * 2) / 3 def __init__(self, list): self.data = list self.result = map(self.process, self.data) super(Worker, self).__init__() if __name__ == '__main__': start = datetime.datetime.now() dataset = range(10000) # null dataset processes = 3 for i in range(processes): chunk = int(math.floor(len(dataset) / float(processes))) if i + 1 == processes: remainder = len(dataset) % processes else: remainder = 0 tmp = dataset[i * chunk : (i + 1) * chunk + remainder] exec('worker'+str(i)+' = Worker(tmp)') exec('worker'+str(i)+'.start()') for i in range(processes): exec('worker'+str(i)+'.join()') # just a placeholder to make sure the initial values of the set are as expected exec('print worker'+str(i)+'.result[0]')