Ok, looks like might be not possible to optimize within the published API.
Test code:
#extconf.rb
require 'mkmf'
dir_config("hello")
create_makefile("hello")
// hello.c
#include "ruby.h"
static VALUE rb_mHello;
static VALUE rb_cMyCalc;
static void calc_mark(void *f) { }
static void calc_free(void *f) { }
static VALUE calc_alloc(VALUE klass) { return Data_Wrap_Struct(klass, calc_mark, calc_free, NULL); }
static VALUE calc_init(VALUE obj) { return Qnil; }
static VALUE calc_merge(VALUE obj, VALUE h1, VALUE h2) {
return rb_funcall(h1, rb_intern("merge"), 1, h2);
}
static VALUE
calc_merge2(VALUE obj, VALUE h1, VALUE h2)
{
VALUE h3 = rb_hash_new();
VALUE keys;
VALUE akey;
keys = rb_funcall(h1, rb_intern("keys"), 0);
while (akey = rb_each(keys)) {
rb_hash_aset(h3, akey, rb_hash_aref(h1, akey));
}
keys = rb_funcall(h2, rb_intern("keys"), 0);
while (akey = rb_each(keys)) {
rb_hash_aset(h3, akey, rb_hash_aref(h2, akey));
}
return h3;
}
static VALUE
calc_merge3(VALUE obj, VALUE h1, VALUE h2)
{
VALUE keys;
VALUE akey;
keys = rb_funcall(h1, rb_intern("keys"), 0);
while (akey = rb_each(keys)) {
rb_hash_aset(h2, akey, rb_hash_aref(h1, akey));
}
return h2;
}
void
Init_hello()
{
rb_mHello = rb_define_module("Hello");
rb_cMyCalc = rb_define_class_under(rb_mHello, "Calculator", rb_cObject);
rb_define_alloc_func(rb_cMyCalc, calc_alloc);
rb_define_method(rb_cMyCalc, "initialize", calc_init, 0);
rb_define_method(rb_cMyCalc, "merge", calc_merge, 2);
rb_define_method(rb_cMyCalc, "merge2", calc_merge, 2);
rb_define_method(rb_cMyCalc, "merge3", calc_merge, 2);
}
# test.rb
require "hello"
h1 = Hash.new()
h2 = Hash.new()
1.upto(100000) { |x| h1[x] = x+1; }
1.upto(100000) { |x| h2["#{x}-12"] = x+1; }
c = Hello::Calculator.new()
puts c.merge(h1, h2).keys.length if ARGV[0] == "1"
puts c.merge2(h1, h2).keys.length if ARGV[0] == "2"
puts c.merge3(h1, h2).keys.length if ARGV[0] == "3"
Now the test results:
$ time ruby test.rb
real 0m1.021s
user 0m0.940s
sys 0m0.080s
$ time ruby test.rb 1
200000
real 0m1.224s
user 0m1.148s
sys 0m0.076s
$ time ruby test.rb 2
200000
real 0m1.219s
user 0m1.132s
sys 0m0.084s
$ time ruby test.rb 3
200000
real 0m1.220s
user 0m1.128s
sys 0m0.092s
So it looks like we might shave off at maximum ~0.004s on a 0.2s operation.
Given that there's probably not that much besides setting the values, there might not be that much space for further optimizations. Maybe try to hack the ruby source itself - but at that point you no longer really develop "extension" but rather change the language, so it probably won't work.
If the join of hashes is something that you need to do many times in the C part - then probably using the internal data structures and only exporting them into Ruby hash in the final pass would be the only way of optimizing things.
p.s. The initial skeleton for the code borrowed from this excellent tutorial