Is there any known data structure that provides O(1) random access, without using a contiguous block of memory of size O(N) or greater? This was inspired by this answer and is being asked for curiosity's sake rather than for any specific practical use case, though it might hypothetically be useful in cases of a severely fragmented heap.
You can use a trie where the length of the key is bounded. As lookup in a trie with a key of length m
is O(m)
, if we bound the length of the keys then we bound m
and now lookup is O(1)
.
So think of the a trie where the keys are strings on the alphabet { 0, 1 }
(i.e., we are thinking of keys as being the binary representation of integers). If we bound the length of the keys to say 32 letters, we have a structure that we can think of as being indexed by 32-bit integers and is randomly-accessible in O(1)
time.
Here is an implementation in C#:
class TrieArray<T> {
TrieArrayNode<T> _root;
public TrieArray(int length) {
this.Length = length;
_root = new TrieArrayNode<T>();
for (int i = 0; i < length; i++) {
Insert(i);
}
}
TrieArrayNode<T> Insert(int n) {
return Insert(IntToBinaryString(n));
}
TrieArrayNode<T> Insert(string s) {
TrieArrayNode<T> node = _root;
foreach (char c in s.ToCharArray()) {
node = Insert(c, node);
}
return _root;
}
TrieArrayNode<T> Insert(char c, TrieArrayNode<T> node) {
if (node.Contains(c)) {
return node.GetChild(c);
}
else {
TrieArrayNode<T> child = new TrieArray<T>.TrieArrayNode<T>();
node.Nodes[GetIndex(c)] = child;
return child;
}
}
internal static int GetIndex(char c) {
return (int)(c - '0');
}
static string IntToBinaryString(int n) {
return Convert.ToString(n, 2);
}
public int Length { get; set; }
TrieArrayNode<T> Find(int n) {
return Find(IntToBinaryString(n));
}
TrieArrayNode<T> Find(string s) {
TrieArrayNode<T> node = _root;
foreach (char c in s.ToCharArray()) {
node = Find(c, node);
}
return node;
}
TrieArrayNode<T> Find(char c, TrieArrayNode<T> node) {
if (node.Contains(c)) {
return node.GetChild(c);
}
else {
throw new InvalidOperationException();
}
}
public T this[int index] {
get {
CheckIndex(index);
return Find(index).Value;
}
set {
CheckIndex(index);
Find(index).Value = value;
}
}
void CheckIndex(int index) {
if (index < 0 || index >= this.Length) {
throw new ArgumentOutOfRangeException("index");
}
}
class TrieArrayNode<TNested> {
public TrieArrayNode<TNested>[] Nodes { get; set; }
public T Value { get; set; }
public TrieArrayNode() {
Nodes = new TrieArrayNode<TNested>[2];
}
public bool Contains(char c) {
return Nodes[TrieArray<TNested>.GetIndex(c)] != null;
}
public TrieArrayNode<TNested> GetChild(char c) {
return Nodes[TrieArray<TNested>.GetIndex(c)];
}
}
}
Here is sample usage:
class Program {
static void Main(string[] args) {
int length = 10;
TrieArray<int> array = new TrieArray<int>(length);
for (int i = 0; i < length; i++) {
array[i] = i * i;
}
for (int i = 0; i < length; i++) {
Console.WriteLine(array[i]);
}
}
}
Yes, here's an example in C++:
template<class T>
struct Deque {
struct Block {
enum {
B = 4*1024 / sizeof(T), // use any strategy you want
// this gives you ~4KiB blocks
length = B
};
T data[length];
};
std::vector<Block*> blocks;
T& operator[](int n) {
return blocks[n / Block::length]->data[n % Block::length]; // O(1)
}
// many things left out for clarity and brevity
};
The main difference from std::deque is this has O(n) push_front instead of O(1), and in fact there's a bit of a problem implementing std::deque to have all of:
- O(1) push_front
- O(1) push_back
- O(1) op[]
Perhaps I misinterpreted "without using a contiguous block of memory of size O(N) or greater", which seems awkward. Could you clarify what you want? I've interpreted as "no single allocation that contains one item for every item in the represented sequence", such as would be helpful to avoid large allocations. (Even though I do have a single allocation of size N/B for the vector.)
If my answer doesn't fit your definition, then nothing will, unless you artificially limit the container's max size. (I can limit you to LONG_MAX items, store the above blocks in a tree instead, and call that O(1) lookup, for example.)
Well, since I've spent time thinking about it, and it could be argued that all hashtables are either a contiguous block of size >N or have a bucket list proportional to N, and Roger's top-level array of Block
s is O(N) with a coefficient less than 1, and I proposed a fix to that in the comments to his question, here goes:
int magnitude( size_t x ) { // many platforms have an insn for this
for ( int m = 0; x >>= 1; ++ m ) ; // return 0 for input 0 or 1
return m;
}
template< class T >
struct half_power_deque {
vector< vector< T > > blocks; // max log(N) blocks of increasing size
int half_first_block_mag; // blocks one, two have same size >= 2
T &operator[]( size_t index ) {
int index_magnitude = magnitude( index );
size_t block_index = max( 0, index_magnitude - half_first_block_mag );
vector< T > &block = blocks[ block_index ];
size_t elem_index = index;
if ( block_index != 0 ) elem_index &= ( 1<< index_magnitude ) - 1;
return block[ elem_index ];
}
};
template< class T >
struct power_deque {
half_power_deque forward, backward;
ptrdiff_t begin_offset; // == - backward.size() or indexes into forward
T &operator[]( size_t index ) {
ptrdiff_t real_offset = index + begin_offset;
if ( real_offset < 0 ) return backward[ - real_offset - 1 ];
return forward[ real_offset ];
}
};
half_power_deque
implements erasing all but the last block, altering half_first_block_mag appropriately. This allows O(max over time N) memory use, amortized O(1) insertions on both ends, never invalidating references, and O(1) lookup.