I tried out three different methods of comparing two identical 3,8 gb files with buffer sizes between 8 kb and 1 MB.
the first first method used just two buffered input streams
the second approach uses a threadpool that reads in two different threads and compares in a third one. this got slightly higher throughput at the expense of a high cpu utilisation. the managing of the threadpool takes a lot of overhead with those short-running tasks.
the third approach uses nio, as posted by laginimaineb
as you can see, the general approach does not differ much. more important is the correct buffer size.
what is strange that i read 1 byte less using threads. i could not spot the error tough.
comparing just with two streams
I was equal, even after 3684070360 bytes and reading for 704813 ms (4,98MB/sec * 2) with a buffer size of 8 kB
I was equal, even after 3684070360 bytes and reading for 578563 ms (6,07MB/sec * 2) with a buffer size of 16 kB
I was equal, even after 3684070360 bytes and reading for 515422 ms (6,82MB/sec * 2) with a buffer size of 32 kB
I was equal, even after 3684070360 bytes and reading for 534532 ms (6,57MB/sec * 2) with a buffer size of 64 kB
I was equal, even after 3684070360 bytes and reading for 422953 ms (8,31MB/sec * 2) with a buffer size of 128 kB
I was equal, even after 3684070360 bytes and reading for 793359 ms (4,43MB/sec * 2) with a buffer size of 256 kB
I was equal, even after 3684070360 bytes and reading for 746344 ms (4,71MB/sec * 2) with a buffer size of 512 kB
I was equal, even after 3684070360 bytes and reading for 669969 ms (5,24MB/sec * 2) with a buffer size of 1024 kB
comparing with threads
I was equal, even after 3684070359 bytes and reading for 602391 ms (5,83MB/sec * 2) with a buffer size of 8 kB
I was equal, even after 3684070359 bytes and reading for 523156 ms (6,72MB/sec * 2) with a buffer size of 16 kB
I was equal, even after 3684070359 bytes and reading for 527547 ms (6,66MB/sec * 2) with a buffer size of 32 kB
I was equal, even after 3684070359 bytes and reading for 276750 ms (12,69MB/sec * 2) with a buffer size of 64 kB
I was equal, even after 3684070359 bytes and reading for 493172 ms (7,12MB/sec * 2) with a buffer size of 128 kB
I was equal, even after 3684070359 bytes and reading for 696781 ms (5,04MB/sec * 2) with a buffer size of 256 kB
I was equal, even after 3684070359 bytes and reading for 727953 ms (4,83MB/sec * 2) with a buffer size of 512 kB
I was equal, even after 3684070359 bytes and reading for 741000 ms (4,74MB/sec * 2) with a buffer size of 1024 kB
comparing with nio
I was equal, even after 3684070360 bytes and reading for 661313 ms (5,31MB/sec * 2) with a buffer size of 8 kB
I was equal, even after 3684070360 bytes and reading for 656156 ms (5,35MB/sec * 2) with a buffer size of 16 kB
I was equal, even after 3684070360 bytes and reading for 491781 ms (7,14MB/sec * 2) with a buffer size of 32 kB
I was equal, even after 3684070360 bytes and reading for 317360 ms (11,07MB/sec * 2) with a buffer size of 64 kB
I was equal, even after 3684070360 bytes and reading for 643078 ms (5,46MB/sec * 2) with a buffer size of 128 kB
I was equal, even after 3684070360 bytes and reading for 865016 ms (4,06MB/sec * 2) with a buffer size of 256 kB
I was equal, even after 3684070360 bytes and reading for 716796 ms (4,90MB/sec * 2) with a buffer size of 512 kB
I was equal, even after 3684070360 bytes and reading for 652016 ms (5,39MB/sec * 2) with a buffer size of 1024 kB
the code used:
import junit.framework.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.concurrent.*;
public class FileCompare {
private static final int MIN_BUFFER_SIZE = 1024 * 8;
private static final int MAX_BUFFER_SIZE = 1024 * 1024;
private String fileName1;
private String fileName2;
private long start;
private long totalbytes;
@Before
public void createInputStream() {
fileName1 = "bigFile.1";
fileName2 = "bigFile.2";
}
@Test
public void compareTwoFiles() throws IOException {
System.out.println("comparing just with two streams");
int currentBufferSize = MIN_BUFFER_SIZE;
while (currentBufferSize <= MAX_BUFFER_SIZE) {
compareWithBufferSize(currentBufferSize);
currentBufferSize *= 2;
}
}
@Test
public void compareTwoFilesFutures()
throws IOException, ExecutionException, InterruptedException {
System.out.println("comparing with threads");
int myBufferSize = MIN_BUFFER_SIZE;
while (myBufferSize <= MAX_BUFFER_SIZE) {
start = System.currentTimeMillis();
totalbytes = 0;
compareWithBufferSizeFutures(myBufferSize);
myBufferSize *= 2;
}
}
@Test
public void compareTwoFilesNio() throws IOException {
System.out.println("comparing with nio");
int myBufferSize = MIN_BUFFER_SIZE;
while (myBufferSize <= MAX_BUFFER_SIZE) {
start = System.currentTimeMillis();
totalbytes = 0;
boolean wasEqual = isEqualsNio(myBufferSize);
if (wasEqual) {
printAfterEquals(myBufferSize);
} else {
Assert.fail("files were not equal");
}
myBufferSize *= 2;
}
}
private void compareWithBufferSize(int myBufferSize) throws IOException {
final BufferedInputStream inputStream1 =
new BufferedInputStream(
new FileInputStream(new File(fileName1)),
myBufferSize);
byte[] buff1 = new byte[myBufferSize];
final BufferedInputStream inputStream2 =
new BufferedInputStream(
new FileInputStream(new File(fileName2)),
myBufferSize);
byte[] buff2 = new byte[myBufferSize];
int read1;
start = System.currentTimeMillis();
totalbytes = 0;
while ((read1 = inputStream1.read(buff1)) != -1) {
totalbytes += read1;
int read2 = inputStream2.read(buff2);
if (read1 != read2) {
break;
}
if (!Arrays.equals(buff1, buff2)) {
break;
}
}
if (read1 == -1) {
printAfterEquals(myBufferSize);
} else {
Assert.fail("files were not equal");
}
inputStream1.close();
inputStream2.close();
}
private void compareWithBufferSizeFutures(int myBufferSize)
throws ExecutionException, InterruptedException, IOException {
final BufferedInputStream inputStream1 =
new BufferedInputStream(
new FileInputStream(
new File(fileName1)),
myBufferSize);
final BufferedInputStream inputStream2 =
new BufferedInputStream(
new FileInputStream(
new File(fileName2)),
myBufferSize);
final boolean wasEqual = isEqualsParallel(myBufferSize, inputStream1, inputStream2);
if (wasEqual) {
printAfterEquals(myBufferSize);
} else {
Assert.fail("files were not equal");
}
inputStream1.close();
inputStream2.close();
}
private boolean isEqualsParallel(int myBufferSize
, final BufferedInputStream inputStream1
, final BufferedInputStream inputStream2)
throws InterruptedException, ExecutionException {
final byte[] buff1Even = new byte[myBufferSize];
final byte[] buff1Odd = new byte[myBufferSize];
final byte[] buff2Even = new byte[myBufferSize];
final byte[] buff2Odd = new byte[myBufferSize];
final Callable<Integer> read1Even = new Callable<Integer>() {
public Integer call() throws Exception {
return inputStream1.read(buff1Even);
}
};
final Callable<Integer> read2Even = new Callable<Integer>() {
public Integer call() throws Exception {
return inputStream2.read(buff2Even);
}
};
final Callable<Integer> read1Odd = new Callable<Integer>() {
public Integer call() throws Exception {
return inputStream1.read(buff1Odd);
}
};
final Callable<Integer> read2Odd = new Callable<Integer>() {
public Integer call() throws Exception {
return inputStream2.read(buff2Odd);
}
};
final Callable<Boolean> oddEqualsArray = new Callable<Boolean>() {
public Boolean call() throws Exception {
return Arrays.equals(buff1Odd, buff2Odd);
}
};
final Callable<Boolean> evenEqualsArray = new Callable<Boolean>() {
public Boolean call() throws Exception {
return Arrays.equals(buff1Even, buff2Even);
}
};
ExecutorService executor = Executors.newCachedThreadPool();
boolean isEven = true;
Future<Integer> read1 = null;
Future<Integer> read2 = null;
Future<Boolean> isEqual = null;
int lastSize = 0;
while (true) {
if (isEqual != null) {
if (!isEqual.get()) {
return false;
} else if (lastSize == -1) {
return true;
}
}
if (read1 != null) {
lastSize = read1.get();
totalbytes += lastSize;
final int size2 = read2.get();
if (lastSize != size2) {
return false;
}
}
isEven = !isEven;
if (isEven) {
if (read1 != null) {
isEqual = executor.submit(oddEqualsArray);
}
read1 = executor.submit(read1Even);
read2 = executor.submit(read2Even);
} else {
if (read1 != null) {
isEqual = executor.submit(evenEqualsArray);
}
read1 = executor.submit(read1Odd);
read2 = executor.submit(read2Odd);
}
}
}
private boolean isEqualsNio(int myBufferSize) throws IOException {
FileChannel first = null, seconde = null;
try {
first = new FileInputStream(fileName1).getChannel();
seconde = new FileInputStream(fileName2).getChannel();
if (first.size() != seconde.size()) {
return false;
}
ByteBuffer firstBuffer = ByteBuffer.allocateDirect(myBufferSize);
ByteBuffer secondBuffer = ByteBuffer.allocateDirect(myBufferSize);
int firstRead, secondRead;
while (first.position() < first.size()) {
firstRead = first.read(firstBuffer);
totalbytes += firstRead;
secondRead = seconde.read(secondBuffer);
if (firstRead != secondRead) {
return false;
}
if (!nioBuffersEqual(firstBuffer, secondBuffer, firstRead)) {
return false;
}
}
return true;
} finally {
if (first != null) {
first.close();
}
if (seconde != null) {
seconde.close();
}
}
}
private static boolean nioBuffersEqual(ByteBuffer first, ByteBuffer second, final int length) {
if (first.limit() != second.limit() || length > first.limit()) {
return false;
}
first.rewind();
second.rewind();
for (int i = 0; i < length; i++) {
if (first.get() != second.get()) {
return false;
}
}
return true;
}
private void printAfterEquals(int myBufferSize) {
NumberFormat nf = new DecimalFormat("#.00");
final long dur = System.currentTimeMillis() - start;
double seconds = dur / 1000d;
double megabytes = totalbytes / 1024 / 1024;
double rate = (megabytes) / seconds;
System.out.println("I was equal, even after " + totalbytes
+ " bytes and reading for " + dur
+ " ms (" + nf.format(rate) + "MB/sec * 2)" +
" with a buffer size of " + myBufferSize / 1024 + " kB");
}
}