I'm not sure if this is a good question or not — please close it if not.
I set out to write (using boost::coordinate_vector
as a starting point) a sparse_vector
template class that efficiently implements a vector-like interface, but is sparse. It implements all the usual vector operations and a fast sparse iterator that iterates over the set elements. It also has a fast version of rotate
. I need this class because I have a write-once-read-many use case, and I use many of these sparse_vectors
.
I have no experience writing template classes, so I know that there are things that I could be doing better. How can I improve this class?
// sparse_vector is a sparse version of std::vector. It stores index-value pairs, and a size, which
// represents the size of the sparse_vector.
#include <algorithm>
#include <ostream>
#include <iostream>
#include <vector>
#include <boost/iterator.hpp>
#include <boost/iterator/iterator_adaptor.hpp>
#include <boost/iterator/iterator_facade.hpp>
#include <boost/iterator/reverse_iterator.hpp>
#include <boost/optional.hpp>
#include <glog/logging.h>
template<typename T>
class sparse_vector {
public:
typedef T value_type;
typedef T* pointer;
typedef T& reference;
typedef const T& const_reference;
typedef size_t size_type;
private:
struct ElementType {
ElementType(size_type i, T const& t): index(i), value(t) {}
ElementType(size_type i, T&& t): index(i), value(move(t)) {}
ElementType(size_type i): index(i) {} // allows lower_bound to work
ElementType(ElementType const&) = default;
size_type index;
value_type value;
};
typedef std::vector<ElementType> array_type;
public:
typedef typename array_type::difference_type difference_type;
private:
typedef const T* const_pointer;
typedef sparse_vector<T> self_type;
typedef typename array_type::const_iterator const_subiterator_type;
typedef typename array_type::iterator subiterator_type;
struct IndexCompare {
bool operator()(ElementType const& a, ElementType const& b) { return a.index < b.index; }
};
public:
// Construction and destruction
inline sparse_vector(): size_(0), sorted_filled_(0), data_() {
storage_invariants(); }
explicit inline sparse_vector(size_type size): size_(size), sorted_filled_(0), data_() {
storage_invariants(); }
inline sparse_vector(const sparse_vector<T> &v):
size_(v.size_), sorted_filled_(v.sorted_filled_), data_(v.data_) {
storage_invariants(); }
inline sparse_vector(sparse_vector<T>&& v):
size_(v.size_), sorted_filled_(v.sorted_filled_), data_(move(v.data_)) {
storage_invariants(); }
sparse_vector(const optional<T> &o): size_(1), sorted_filled_(0) {
if (o) {
append_element(0, *o);
sorted_filled_ = 1;
}
storage_invariants();
}
sparse_vector(const std::vector<T> &v):
size_(v.size()), sorted_filled_(0) {
data_.reserve(v.size());
for (auto it = v.begin(); it != v.end(); ++it)
append_element(it - v.begin(), *it);
sorted_filled_ = v.size();
storage_invariants();
}
sparse_vector(const sparse_vector<optional<T>> &sv):
size_(sv.size()), sorted_filled_(0) {
for (auto it = sv.sparse_begin(); it != sv.sparse_end(); ++it)
if (*it)
append_element(it.index(), **it);
sorted_filled_ = data_.size();
storage_invariants();
}
sparse_vector(size_type size, vector<pair<size_type, T>> init_list):
size_(size), sorted_filled_(0) {
for (auto it = init_list.begin(); it != init_list.end(); ++it)
append_element(it->first, it->second);
// require that the list be sorted?
// sorted_filled_ = data_.size();
storage_invariants();
}
/*template<class AE>
inline sparse_vector(const sparse_vector<AE> &ae):
size_(ae().size()), sorted_filled_(ae.nnz()), data_() {
storage_invariants();
for (auto it = ae.sparse_begin(); it != ae.sparse_end(); ++it) {
(*this)[it.index()] = static_cast<T>(*it);
}
}*/
// Conversion operators
// Copy sparse_vector to a vector filling in uninitialized elements with T()
operator std::vector<T>() {
std::vector<T> retval(size_);
for (auto it = data_.begin(); it != data_.end(); ++it)
retval[it.index()] = *it;
return retval;
}
// Copy sparse_vector to a vector filling in uninitialized elements with a default value.
void CopyToVector(std::vector<T>* v, T const& default_value = T()) {
size_type last_index = 0;
for (auto it = sparse_begin(); it != sparse_end(); ++it) {
for (size_type i = last_index; i < it.index(); ++i) {
(*v)[i] = default_value;
}
(*v)[it.index()] = *it;
last_index = it.index() + 1;
}
}
// Accessors
inline size_type size() const {
return size_;
}
inline size_type nnz_capacity() const {
return data_.capacity();
}
inline size_type nnz() const {
return data_.size();
}
// Resizing
inline void resize(size_type size, bool preserve = true) {
if (preserve) {
sort(); // remove duplicate elements.
subiterator_type it(lower_bound(data_.begin(), data_.end(), size, IndexCompare()));
data_.erase(it, data_.end());
} else {
data_.clear();
}
size_ = size;
sorted_filled_ = nnz();
storage_invariants();
}
// Reserving
inline void reserve(size_type non_zeros, bool preserve = true) {
if (preserve)
sort(); // remove duplicate elements.
else
data_.clear();
data_.reserve(non_zeros);
sorted_filled_ = nnz();
storage_invariants();
}
// Element support
// find_element returns a pointer to element i or NULL -- it never creates an element
inline pointer find_element(size_type i) {
return const_cast<pointer> (const_cast<const self_type&>(*this).find_element(i)); }
inline const_pointer find_element(size_type i) const {
sort();
const_subiterator_type it(lower_bound(data_.begin(), data_.end(), i, IndexCompare()));
if (it == data_.end() || it->index != i)
return NULL;
return &it->value;
}
// find_element_optional returns a boost::optional<T>
inline boost::optional<value_type> find_element_optional(size_type i) const {
CHECK_LT(i, size_);
sort();
const_subiterator_type it(lower_bound(data_.begin(), data_.end(), i, IndexCompare()));
if (it == data_.end() || it->index != i)
return boost::optional<value_type>();
return boost::optional<value_type>(it->value);
}
// Element access
// operator[] returns a reference to element i -- the const version returns a reference to
// a zero_ element if it doesn't exist; the non-const version creates it in that case.
inline const_reference operator[] (size_type i) const {
CHECK_LT(i, size_);
sort();
const_subiterator_type it(lower_bound(data_.begin(), data_.end(), i, IndexCompare()));
if (it == data_.end() || it->index != i)
return zero_;
return it->value;
}
inline reference operator[] (size_type i) {
CHECK_LT(i, size_);
sort();
subiterator_type it(lower_bound(data_.begin(), data_.end(), i, IndexCompare()));
if (it == data_.end() || it->index != i)
return insert_element(i, value_type/*zero*/());
else
return it->value;
}
// Element assignment
inline void append_element(size_type i, const_reference t) {
data_.emplace_back(i, t);
storage_invariants();
}
inline void append_element(size_type i, T&& t) {
data_.emplace_back(i, move(t));
storage_invariants();
}
inline void sorted_append_element(size_type i, const_reference t) {
// must maintain sort order
CHECK(sorted());
if (data_.size() > 0)
CHECK_LT(data_.back().index, i);
data_.emplace_back(i, t);
sorted_filled_ = data_.size();
storage_invariants();
}
/* This function is probably not useful.
* inline void sparse_pop_back() {
// ISSUE invariants could be simpilfied if sorted required as precondition
CHECK_GT(data_.size(), 0);
data_.pop_back();
sorted_filled_ = (std::min)(sorted_filled_, data_.size());
storage_invariants();
}*/
inline reference insert_element(size_type i, const_reference t) {
DCHECK(find_element(i) == NULL); // duplicate element
append_element(i, t);
return data_.back().value;
}
// Element clearing
// TODO(neil): consider replacing size_type functions with iterator functions
// replaces elements in the range [i, i] with "zero" (by erasing them from the map)
inline void clear_element(size_type i) {
sort();
subiterator_type it(lower_bound(data_.begin(), data_.end(), i, IndexCompare()));
if (it == data_.end() || it->index != i)
return;
data_.erase(it);
--sorted_filled_;
storage_invariants();
}
// replaces elements in the range [first, last) with "zero" (by erasing them from the map)
inline void clear_elements(size_type first, size_type last) {
CHECK_LE(first, last);
sort();
subiterator_type it_first(lower_bound(data_.begin(), data_.end(), first, IndexCompare()));
subiterator_type it_last(lower_bound(data_.begin(), data_.end(), last, IndexCompare()));
sorted_filled_ -= it_last - it_first;
data_.erase(it_first, it_last);
storage_invariants();
}
// Zeroing
inline void clear() { data_.clear(); sorted_filled_ = 0; storage_invariants(); }
// Comparison
inline bool operator==(const sparse_vector &v) const {
if (this == &v)
return true;
if (size_ != v.size_)
return false;
auto it_this = sparse_begin();
for (auto it = v.sparse_begin(); it != v.sparse_end(); ++it) {
if (it_this == sparse_end()
|| it_this.index() != it.index()
|| *it_this != *it)
return false;
++it_this;
}
return true;
}
// Assignment
inline sparse_vector& operator=(const sparse_vector &v) {
if (this != &v) {
size_ = v.size_;
sorted_filled_ = v.sorted_filled_;
data_ = v.data_;
}
storage_invariants();
return *this;
}
inline sparse_vector &assign_temporary(sparse_vector &v) {
swap(v);
return *this;
}
template<class AE>
inline sparse_vector& operator=(const sparse_vector<AE> &ae) {
self_type temporary(ae);
return assign_temporary(temporary);
}
// Swapping
inline void swap(sparse_vector &v) {
if (this != &v) {
std::swap(size_, v.size_);
std::swap(sorted_filled_, v.sorted_filled_);
data_.swap(v.data_);
}
storage_invariants();
}
inline friend void swap(sparse_vector &v1, sparse_vector &v2) { v1.swap(v2); }
// Sorting and summation of duplicates
inline bool sorted() const { return sorted_filled_ == data_.size(); }
inline void sort() const {
if (!sorted() && data_.size() > 0) {
// sort new elements and merge
std::sort(data_.begin() + sorted_filled_, data_.end(), IndexCompare());
std::inplace_merge(data_.begin(), data_.begin() + sorted_filled_, data_.end(),
IndexCompare());
// check for duplicates
size_type filled = 0;
for(size_type i = 1; i < data_.size(); ++i) {
if (data_[filled].index != data_[i].index) {
++filled;
if (filled != i) {
data_[filled] = data_[i];
}
} else {
CHECK(false); // duplicates
// value_data_[filled] += value_data_[i];
}
}
++filled;
sorted_filled_ = filled;
data_.erase(data_.begin() + filled, data_.end());
storage_invariants();
}
}
// Back element insertion and erasure
inline void push_back(const_reference t) {
CHECK(sorted());
data_.emplace_back(size(), t);
if (sorted_filled_ == data_.size())
++sorted_filled_;
++size_;
storage_invariants();
}
inline void pop_back() {
CHECK_GT(size_, 0);
clear_element(size_ - 1);
if (sorted_filled_ == size_)
--sorted_filled_;
--size_;
storage_invariants();
}
// Iterator types
private:
template<typename base_type, typename iterator_value_type>
class sparse_iterator_private
: public boost::iterator_adaptor<
sparse_iterator_private<base_type, iterator_value_type>, // Derived
base_type, // Base
iterator_value_type, // Value
boost::random_access_traversal_tag // CategoryOrTraversal
> {
private:
struct enabler {}; // a private type avoids misuse
public:
sparse_iterator_private()
: sparse_iterator_private<base_type, iterator_value_type>::iterator_adaptor_() {}
explicit sparse_iterator_private(base_type&& p)
: sparse_iterator_private<base_type, iterator_value_type>::iterator_adaptor_(p) {}
size_type index() const { return this->base()->index; }
iterator_value_type& value() const { return this->base()->value; }
private:
friend class boost::iterator_core_access;
iterator_value_type& dereference() const { return this->base()->value; }
};
template<typename container_type, typename iterator_value_type>
class iterator_private
: public boost::iterator_facade<
iterator_private<container_type, iterator_value_type>, // Derived
iterator_value_type, // Value
boost::random_access_traversal_tag, // CategoryOrTraversal
iterator_value_type&, // Reference
difference_type // Difference
> {
private:
struct enabler {}; // a private type avoids misuse
public:
iterator_private(): container_(NULL), index_(0) {}
iterator_private(container_type* container, size_type index)
: container_(container), index_(index) {}
iterator_private(iterator_private const& other)
: container_(other.container_), index_(other.index_) {}
iterator_private& operator=(iterator_private const& other) {
container_ = other.container_;
index_ = other.index_;
}
container_type& container() const { return *container_; }
size_type index() const { return index_; }
iterator_value_type& value() const { return dereference(); }
/* TODO(neil): how do you make this work?
template<typename U>
sparse_iterator_private<U> subsequent_sparse_iterator() {
return sparse_iterator_private<T>(lower_bound(container_.data_.begin(),
container_.data_.end(),
index_, IndexCompare()));
}
*/
private:
friend class boost::iterator_core_access;
iterator_value_type& dereference() const { return (*container_)[index_]; }
bool equal(iterator_private<container_type, iterator_value_type> const& other) const {
return index_ == other.index_ && container_ == other.container_; }
void increment() { ++index_; }
void decrement() { --index_; }
void advance(int n) { index_ += n; }
difference_type distance_to(iterator_private<container_type,
iterator_value_type> const& other) {
return index_ - other.index_; }
container_type* container_;
size_type index_;
};
public:
typedef sparse_iterator_private<typename array_type::iterator, value_type> sparse_iterator;
typedef sparse_iterator_private<
typename array_type::const_iterator, const value_type> const_sparse_iterator;
typedef iterator_private<sparse_vector, value_type> iterator;
typedef iterator_private<const sparse_vector, const value_type> const_iterator;
typedef boost::reverse_iterator<sparse_iterator> reverse_sparse_iterator;
typedef boost::reverse_iterator<const_sparse_iterator> const_reverse_sparse_iterator;
typedef boost::reverse_iterator<iterator> reverse_iterator;
typedef boost::reverse_iterator<const_iterator> const_reverse_iterator;
// Element lookup
// inline This function seems to be big. So we do not let the compiler inline it.
sparse_iterator sparse_find(size_type i) {
sort();
return sparse_iterator(lower_bound(data_.begin(), data_.end(), i, IndexCompare()));
}
// inline This function seems to be big. So we do not let the compiler inline it.
const_sparse_iterator sparse_find(size_type i) const {
sort();
return const_sparse_iterator(lower_bound(data_.begin(), data_.end(), i, IndexCompare()));
}
// the nth non-zero element
const_sparse_iterator nth_nonzero(size_type i) const {
CHECK_LE(data_.size(), i);
sort();
return const_sparse_iterator(data_.begin() + i);
}
// Forward iterators
inline sparse_iterator sparse_begin() { return sparse_find(0); }
inline sparse_iterator sparse_end() { return sparse_find(size_); }
inline const_sparse_iterator sparse_begin() const { return sparse_find(0); }
inline const_sparse_iterator sparse_end() const { return sparse_find(size_); }
inline iterator begin() { return iterator(this, 0); }
inline iterator end() { return iterator(this, size()); }
inline const_iterator begin() const { return const_iterator(this, 0); }
inline const_iterator end() const { return const_iterator(this, size()); }
// Reverse iterators
inline reverse_sparse_iterator sparse_rbegin() {
return reverse_sparse_iterator(sparse_end()); }
inline reverse_sparse_iterator sparse_rend() {
return reverse_sparse_iterator(sparse_begin()); }
inline const_reverse_sparse_iterator sparse_rbegin() const {
return const_reverse_sparse_iterator(sparse_end()); }
inline const_reverse_sparse_iterator sparse_rend() const {
return const_reverse_sparse_iterator(sparse_begin()); }
inline reverse_iterator rbegin() {
return reverse_iterator(end()); }
inline reverse_iterator rend() {
return reverse_iterator(begin()); }
inline const_reverse_iterator rbegin() const {
return const_reverse_iterator(end()); }
inline const_reverse_iterator rend() const {
return const_reverse_iterator(begin()); }
// Mutating algorithms
// like vector::erase (erases the elements from the container and shrinks the container)
inline void erase(iterator const& first, iterator const& last) {
clear_elements(first.index(), last.index());
CHECK(sorted());
subiterator_type it_first(lower_bound(data_.begin(), data_.end(),
first.index(), IndexCompare()));
size_t n = last.index() - first.index();
for (auto it = it_first; it != data_.end(); ++it)
it->index -= n;
size_ -= n;
}
// like vector::insert (insert elements into the container and causes the container to grow)
inline void insert(iterator const& index, size_type n) {
sort();
subiterator_type it_first(lower_bound(data_.begin(), data_.end(),
index.index(), IndexCompare()));
for (auto it = it_first; it != data_.end(); ++it)
it->index += n;
size_ += n;
}
// Removes all "zero" elements
void remove_zeros() {
size_type filled = 0;
for(size_type i = 0; i < data_.size(); ++i) {
if (i == sorted_filled_) {
// Once we've processed sorted_filled_ items, we know that whatever we've filled so
// far is sorted.
CHECK_LE(filled, sorted_filled_);
// Since sorted_filled_ only shrinks here, and i only grows, we won't enter this
// condition twice.
sorted_filled_ = filled;
}
if (data_[i].value != value_type/*zero*/()) {
if (filled != i) {
data_[filled] = data_[i];
}
++filled;
}
}
data_.erase(data_.begin() + filled, data_.end());
storage_invariants();
}
void rotate(size_type first, size_type middle, size_type last) {
CHECK_LT(first, middle);
CHECK_LT(middle, last);
sort();
subiterator_type it_first(lower_bound(data_.begin(), data_.end(),
first, IndexCompare()));
subiterator_type it_middle(lower_bound(data_.begin(), data_.end(),
middle, IndexCompare()));
subiterator_type it_last(lower_bound(data_.begin(), data_.end(),
last, IndexCompare()));
for (auto it = it_first; it != it_middle; ++it)
it->index += last - middle;
for (auto it = it_middle; it != it_last; ++it)
it->index -= middle - first;
std::rotate(it_first, it_middle, it_last);
// array remains sorted
}
sparse_vector<value_type> concatenate(sparse_vector<value_type> const& other) const {
sparse_vector<value_type> retval(size() + other.size());
for (auto it = sparse_begin(); it != sparse_end(); ++it) {
retval[it.index()] = *it;
}
for (auto it = other.sparse_begin(); it != other.sparse_end(); ++it) {
retval[it.index() + size()] = *it;
}
return retval;
}
private:
void storage_invariants() const {
CHECK_LE(sorted_filled_, data_.size());
if (sorted())
CHECK_EQ(sorted_filled_, data_.size());
else
CHECK_LT(sorted_filled_, data_.size());
if (!data_.empty())
CHECK_LT(data_.back().index, size_);
for (size_type i = 1; i < sorted_filled_; ++i)
CHECK_GT(data_[i].index, data_[i - 1].index);
}
size_type size_;
mutable typename array_type::size_type sorted_filled_;
mutable array_type data_;
static const value_type zero_;
};
template<typename T>
const typename sparse_vector<T>::value_type sparse_vector<T>::zero_ = T();
template<typename T>
ostream& operator<<(ostream& os, const sparse_vector<T>& v) {
os << "[" << v.size() << ": ";
for (auto it = v.sparse_begin(); it != v.sparse_end(); ++it) {
os << "(" << it.index() << ": " << it.value() << ") ";
}
return os << "]";
}