This question could actually be separated into two parts:
- How should I manage memory for flat array data?
- How should I access elements of a flat array?
I personally prefer to use std::vector for managing memory except in cases where I need to maintain compatibility with code that doesn't use STL (i.e. when interfacing with straight C code). It's much harder to make exception-safe code with raw arrays allocated via new or malloc (in part because it's really easy to forget that you need to worry about it). See any article on RAII for the reasons.
In practice, std::vector is implemented as a flat array. As such, it's always possible to pull out the raw array and use C-style access patterns. I typically start with the vector subscript operator syntax. For some compilers, when producing a debug version, vectors provide automatic boundary checking. This is slow (often a 10x slowdown for tight loops), but helpful in finding certain types of bugs.
If profiling on a particular platform indicates that the operator[] is a bottleneck, then I switch to directly accessing the raw array. Interestingly, depending on the compiler and OS, it can sometimes be faster to use an STL vector than a raw array.
Here are some results from a simple test application. It was compiled with Visual Studio 2008 in 32-bit Release mode using /O2 optimizations and run on Vista x64. Similar results are achieved with a 64-bit test application.
Binary search...
fill vector (for reference) : 0.27 s
array with ptr math : 0.38 s <-- C-style pointers lose
array with int index : 0.23 s <-- [] on raw array wins
array with ptrdiff_t index : 0.24 s
vector with int index : 0.30 s <-- small penalty for vector abstraction
vector with ptrdiff_t index : 0.30 s
Counting memory (de)allocation...
memset (for reference) : 2.85 s
fill malloc-ed raw array with [] : 2.66 s
fill malloc-ed raw array with ptr : 2.81 s
fill new-ed raw array with [] : 2.64 s
fill new-ed raw array with ptr : 2.65 s
fill vector as array : 3.06 s \ something's slower
fill vector : 3.05 s / with vector!
NOT counting memory (de)allocation...
memset (for reference) : 2.57 s
fill malloc-ed raw array with [] : 2.86 s
fill malloc-ed raw array with ptr : 2.60 s
fill new-ed raw array with [] : 2.63 s
fill new-ed raw array with ptr : 2.78 s
fill vector as array : 2.49 s \ after discounting the
fill vector : 2.54 s / (de)allocation vector is faster!
Code:
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
#include <string>
#include <vector>
#include <stdio.h>
using namespace std;
__int64 freq; // initialized in main
int const N = 1024*1024*1024/sizeof(int)/2; // 1/2 GB of data
int const nIter = 10;
class Timer {
public:
Timer(char *name) : name(name) {
QueryPerformanceCounter((LARGE_INTEGER*)&start);
}
~Timer() {
__int64 stop;
QueryPerformanceCounter((LARGE_INTEGER*)&stop);
printf(" %36s : % 4.2f s\n", name.c_str(), (stop - start)/double(freq));
}
private:
string const name;
__int64 start;
};
template <typename Container, typename Index>
int binarySearch_indexed(Container sortedArray, Index first, Index last, int key) {
while (first <= last) {
Index mid = (first + last) / 2; // NOT safe if (first+last) is too big!
if (key > sortedArray[mid]) first = mid + 1;
else if (key < sortedArray[mid]) last = mid - 1;
else return mid;
}
return 0; // Use "(Index)-1" in real code
}
int Dummy = -1;
int const *binarySearch_ptr(int const *first, int const *last, int key) {
while (first <= last) {
int const *mid = (int const *)(((unsigned __int64)first + (unsigned __int64)last) / 2);
if (key > *mid) first = mid + 1;
else if (key < *mid) last = mid - 1;
else return mid;
}
return &Dummy; // no NULL checks: don't do this for real
}
void timeFillWithAlloc() {
printf("Counting memory (de)allocation...\n");
{
Timer tt("memset (for reference)");
int *data = (int*)malloc(N*sizeof(int));
for (int it=0; it<nIter; it++) memset(data, 0, N*sizeof(int));
free(data);
}
{
Timer tt("fill malloc-ed raw array with []");
int *data = (int*)malloc(N*sizeof(int));
for (int it=0; it<nIter; it++) for (size_t i=0; i<N; i++) data[i] = (int)i;
free(data);
}
{
Timer tt("fill malloc-ed raw array with ptr");
int *data = (int*)malloc(N*sizeof(int));
for (int it=0; it<nIter; it++) {
int *d = data;
for (size_t i=0; i<N; i++) *d++ = (int)i;
}
free(data);
}
{
Timer tt("fill new-ed raw array with []");
int *data = new int[N];
for (int it=0; it<nIter; it++) for (size_t i=0; i<N; i++) data[i] = (int)i;
delete [] data;
}
{
Timer tt("fill new-ed raw array with ptr");
int *data = new int[N];
for (int it=0; it<nIter; it++) {
int *d = data;
for (size_t i=0; i<N; i++) *d++ = (int)i;
}
delete [] data;
}
{
Timer tt("fill vector as array");
vector<int> data(N);
for (int it=0; it<nIter; it++) {
int *d = &data[0];
for (size_t i=0; i<N; i++) *d++ = (int)i;
}
}
{
Timer tt("fill vector");
vector<int> data(N);
for (int it=0; it<nIter; it++) for (size_t i=0; i<N; i++) data[i] = (int)i;
}
printf("\n");
}
void timeFillNoAlloc() {
printf("NOT counting memory (de)allocation...\n");
{
int *data = (int*)malloc(N*sizeof(int));
{
Timer tt("memset (for reference)");
for (int it=0; it<nIter; it++) memset(data, 0, N*sizeof(int));
}
free(data);
}
{
int *data = (int*)malloc(N*sizeof(int));
{
Timer tt("fill malloc-ed raw array with []");
for (int it=0; it<nIter; it++) for (size_t i=0; i<N; i++) data[i] = (int)i;
}
free(data);
}
{
int *data = (int*)malloc(N*sizeof(int));
{
Timer tt("fill malloc-ed raw array with ptr");
for (int it=0; it<nIter; it++) {
int *d = data;
for (size_t i=0; i<N; i++) *d++ = (int)i;
}
}
free(data);
}
{
int *data = new int[N];
{
Timer tt("fill new-ed raw array with []");
for (int it=0; it<nIter; it++) for (size_t i=0; i<N; i++) data[i] = (int)i;
}
delete [] data;
}
{
int *data = new int[N];
{
Timer tt("fill new-ed raw array with ptr");
for (int it=0; it<nIter; it++) {
int *d = data;
for (size_t i=0; i<N; i++) *d++ = (int)i;
}
}
delete [] data;
}
{
vector<int> data(N);
{
Timer tt("fill vector as array");
for (int it=0; it<nIter; it++) {
int *d = &data[0];
for (size_t i=0; i<N; i++) *d++ = (int)i;
}
}
}
{
vector<int> data(N);
{
Timer tt("fill vector");
for (int it=0; it<nIter; it++) for (size_t i=0; i<N; i++) data[i] = (int)i;
}
}
printf("\n");
}
void timeBinarySearch() {
printf("Binary search...\n");
vector<int> data(N);
{
Timer tt("fill vector (for reference)");
for (size_t i=0; i<N; i++) data[i] = (int)i;
}
{
Timer tt("array with ptr math");
int sum = 0;
for (int i=-1000000; i<1000000; i++) {
sum += *binarySearch_ptr(&data[0], &data[0]+data.size(), i);
}
}
{
Timer tt("array with int index");
int sum = 0;
for (int i=-1000000; i<1000000; i++) {
sum += data[binarySearch_indexed<int const *, int>(
&data[0], 0, (int)data.size(), -1)];
}
}
{
Timer tt("array with ptrdiff_t index");
int sum = 0;
for (int i=-1000000; i<1000000; i++) {
sum += data[binarySearch_indexed<int const *, ptrdiff_t>(
&data[0], 0, (ptrdiff_t)data.size(), -1)];
}
}
{
Timer tt("vector with int index");
int sum = 0;
for (int i=-1000000; i<1000000; i++) {
sum += data[binarySearch_indexed<vector<int> const &, int>(
data, 0, (int)data.size(), -1)];
}
}
{
Timer tt("vector with ptrdiff_t index");
int sum = 0;
for (int i=-1000000; i<1000000; i++) {
sum += data[binarySearch_indexed<vector<int> const &, ptrdiff_t>(
data, 0, (ptrdiff_t)data.size(), -1)];
}
}
printf("\n");
}
int main(int argc, char **argv)
{
QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
timeBinarySearch();
timeFillWithAlloc();
timeFillNoAlloc();
return 0;
}