Here's the harness I'm using at the moment. I consistently get the code in the question (Method 1) to perform significantly faster. Currently Mark Peters' answer is the fastest normally, but Carl's is faster with the -server
flag set. Also, mikera's gets considerably more competitive in server mode (though still slower than Carl/Mark's).
import java.util.Random;
public abstract class Harness {
public static void main(String[] args) {
while (true) {
long[] randomData = new long[4096];
Random r = new Random();
for (int i = 0; i < randomData.length; i++) {
randomData[i] = r.nextLong();
}
long[] buffer = new long[64];
Harness[] versions = new Harness[] {
new Control(randomData, buffer),
new Carl(randomData, buffer),
new MarkPeters(randomData, buffer),
new MarkPetersServer64Bit(randomData, buffer),
// new Rsp1(randomData, buffer),
new Rsp2(randomData, buffer),
new Mikera(randomData, buffer)
};
for (Harness v : versions) {
v.doTest();
}
}
}
private final long[] buffer;
private final long[] randomData;
protected Harness(long[] randomData, long[] buffer) {
this.randomData = randomData;
this.buffer = buffer;
}
public void doTest() {
long start = System.nanoTime();
long count = 0;
for (int times = 0; times < 1000; times++) {
for (int i = 0; i < randomData.length; i++) {
count = decompose(buffer, randomData[i]);
}
}
long end = System.nanoTime();
//verify decomposition of last item
long l = 0;
for ( int i = 0; i < count; i++ ) {
l |= buffer[i];
}
System.out.println(getClass().getSimpleName() + " took " + (end - start)
/ 1000000.0 + " ms - last base: " + l);
}
protected abstract int decompose(long[] buffer, long base);
private static class Control extends Harness {
protected Control(long[] randomData, long[] buffer) {
super(randomData, buffer);
}
@Override
protected int decompose(long[] buffer, long base) {
return 0;
}
}
private static class Carl extends Harness {
protected Carl(long[] randomData, long[] buffer) {
super(randomData, buffer);
}
@Override
protected int decompose(long[] buffer, long base) {
final int count = Long.bitCount(base);
for (int i = 0; i < count; i++) {
base ^= (buffer[i] = Long.lowestOneBit(base));
}
return count;
}
}
private static class Mikera extends Harness {
protected Mikera(long[] randomData, long[] buffer) {
super(randomData, buffer);
}
@Override
protected int decompose(long[] buffer, long base) {
int count = 0;
while (base != 0) {
long mask = base & (-base);
base &= ~mask;
buffer[count++] = mask;
}
return count;
}
}
private static class Rsp1 extends Harness {
protected Rsp1(long[] randomData, long[] buffer) {
super(randomData, buffer);
}
@Override
protected int decompose(long[] buffer, long base) {
int count = 0;
if (0 != (base & 1)) {
buffer[count++] = 1;
}
base >>>= 1;
for (long bit = 1; 0 < bit && bit <= base; ) {
if (0 < (base & bit)) {
buffer[count++] = (bit <<= 1);
}
}
return count;
}
}
private static class Rsp2 extends Harness {
protected Rsp2(long[] randomData, long[] buffer) {
super(randomData, buffer);
}
@Override
protected int decompose(long[] buffer, long base) {
int count = 0;
for (long bit = 1; 0 != base; bit <<= 1, base >>>= 1) {
if (0 < (base & 1)) {
buffer[count++] = bit;
}
}
return count;
}
}
private static class MarkPeters extends Harness {
protected MarkPeters(long[] randomData, long[] buffer) {
super(randomData, buffer);
}
@Override
protected int decompose(long[] buffer, long base) {
int count = Long.bitCount(base);
for (int i = 0; i < count; i++) {
base -= (buffer[i] = Long.lowestOneBit(base));
}
return count;
}
}
private static class MarkPetersServer64Bit extends Harness {
protected MarkPetersServer64Bit(long[] randomData, long[] buffer) {
super(randomData, buffer);
}
@Override
protected int decompose(long[] buffer, long base) {
int count = 0;
while ( 0 != (base ^= (buffer[count++] = Long.lowestOneBit(base))));
return count;
}
}
}
Sample output
Which method is best depends on the situation.
Non-server, 32-bit JVM
Control took 41.175272 ms - last base: 0
Carl took 691.966919 ms - last base: 5852835112840111303
MarkPeters took 642.230253 ms - last base: 5852835112840111303
MarkPetersServer64Bit took 742.594626 ms - last base: 5852835112840111303
Rsp2 took 3886.203787 ms - last base: 5852835112840111303
Mikera took 1044.451494 ms - last base: 5852835112840111303
Winner: MarkPeters
Server, 32-bit JVM
Control took 2.354383 ms - last base: 0
Carl took 508.687401 ms - last base: 338317437500027646
MarkPeters took 521.831297 ms - last base: 338317437500027646
MarkPetersServer64Bit took 727.052206 ms - last base: 338317437500027646
Rsp2 took 3811.75662 ms - last base: 338317437500027646
Mikera took 665.252599 ms - last base: 338317437500027646
Winner: Carl
Server (implicit or explicit), 64-bit JVM
Control took 0.007186 ms - last base: 0
Carl took 543.701859 ms - last base: -8898262206218882664
MarkPeters took 439.706079 ms - last base: -8898262206218882664
MarkPetersServer64Bit took 391.831055 ms - last base: -8898262206218882664
Rsp2 took 1861.40449 ms - last base: -8898262206218882664
Mikera took 435.964319 ms - last base: -8898262206218882664
Winner: MarkPetersServer64Bit
Note: there is no comparable 64-bit JVM that can run in non-server mode to my knowledge. See here:
Are both -client and -server VM modes available in 64-bit Java?
Currently only the Java HotSpot Server VM supports 64-bit operation, and the -server option is implicit with the use of -d64. This is subject to change in a future release.