Here's my complete, tested, version, using a std::tr1::unordered_map
.
I make this approximately O(n). Firstly it iterates through the n input values to insert/update the counts in the unordered_map
, then it does a partial_sort_copy
which is O(n). 2*O(n) ~= O(n).
#include <unordered_map>
#include <vector>
#include <algorithm>
#include <iostream>
namespace {
// Only used in most_frequent but can't be a local class because of the member template
struct second_greater {
// Need to compare two (slightly) different types of pairs
template <typename PairA, typename PairB>
bool operator() (const PairA& a, const PairB& b) const
{ return a.second > b.second; }
};
}
template <typename Iter>
std::pair<typename std::iterator_traits<Iter>::value_type, unsigned int>
most_frequent(Iter begin, Iter end)
{
typedef typename std::iterator_traits<Iter>::value_type value_type;
typedef std::pair<value_type, unsigned int> result_type;
std::tr1::unordered_map<value_type, unsigned int> counts;
for(; begin != end; ++begin)
// This is safe because new entries in the map are defined to be initialized to 0 for
// built-in numeric types - no need to initialize them first
++ counts[*begin];
// Only need the top one at this point (could easily expand to top-n)
std::vector<result_type> top(1);
std::partial_sort_copy(counts.begin(), counts.end(),
top.begin(), top.end(), second_greater());
return top.front();
}
int main(int argc, char* argv[])
{
int a[] = { 2, 456, 34, 3456, 2, 435, 2, 456, 2 };
std::pair<int, unsigned int> m = most_frequent(a, a + (sizeof(a) / sizeof(a[0])));
std::cout << "most common = " << m.first << " (" << m.second << " instances)" << std::endl;
assert(m.first == 2);
assert(m.second == 4);
return 0;
}