views:

37

answers:

0

I use BrB to share a datasource for various worker processes in Ruby 1.9 that I fork with Process#fork like the following:

Thread.abort_on_exception = true

fork do
  puts "Initializing data source process... (PID: #{Process.pid})"
  data = DataSource.new(files)

  BrB::Service.start_service(:object => data, :verbose => false, :host => host, :port => port)
  EM.reactor_thread.join
end

The workers are forked as follows:

8.times do |t|  
  fork do
    data = BrB::Tunnel.create(nil, "brb://#{host}:#{port}", :verbose => false)

    puts "Launching #{threads_num} worker threads... (PID: #{Process.pid})"    

    threads = []
    threads_num.times { |i|
      threads << Thread.new {
        while true
          begin
            worker = Worker.new(data, config)

          rescue OutOfTargetsError
            break

          rescue Exception => e
            puts "An unexpected exception was caught: #{e.class} => #{e}"
            sleep 5

          end
        end
      }
    }
    threads.each { |t| t.join }

    data.stop_service
    EM.stop
  end
end

This works pretty much perfectly, but after around 10 minutes of running I get the following error:

bootstrap.rb:47:in `join': deadlock detected (fatal)
        from bootstrap.rb:47:in `block in '
        from bootstrap.rb:39:in `fork'
        from bootstrap.rb:39:in `'

Now this error doesn't tell me much about where the deadlock is actually happening, it only points me to the join on the EventMachine thread.

How do I trace back at which point the program locks up?