Boost isn't magic. It'll just loop over that vector too. It depends on how efficient you need this to be; if it's not time intensive, then just write the simple loop.
(By the way, if you're wondering how to determine if a string contains another as a substring, use mystring.find(mysubstring)
.)
Edit: I may have been a little snarky with my first response, so I'll elaborate. If you're interested in efficiency, you can do it with one pass of the large string, which will be an improvement if the main string is much longer than the substrings.
The running time of the following is O(km + kn)
, where there are k
substrings of max length m
, and we're checking in a string of length n
(as opposed to the naive algorithm, which is O(kmn)
).
#include <cassert>
#include <list>
#include <map>
#include <string>
// store the substrings in a Trie data structure (http://en.wikipedia.org/wiki/Trie)
class Trie {
public:
friend class TrieCounter;
Trie(): m_parent(0) {}
~Trie() { for(Leaves::iterator it=m_leaves.begin();it!=m_leaves.end();++it) delete it->second; }
const Trie *add(const char *str) {
Leaves::iterator it = m_leaves.find(str[0]);
if(it == m_leaves.end())
it = m_leaves.insert(std::make_pair(str[0], new Trie(this))).first;
if(!str[0])
return it->second;
return it->second->add(str + 1);
}
const Trie *get(char c) const {
Leaves::const_iterator it = m_leaves.find(c);
if(it == m_leaves.end())
return 0;
return it->second;
}
bool is_leaf() const { return m_leaves.empty(); }
bool is_end_of_word() const { return m_leaves.find(0) != m_leaves.end(); }
private:
Trie(Trie *parent): m_parent(parent) {}
private:
Trie *m_parent;
typedef std::map<char, Trie*> Leaves;
Leaves m_leaves;
};
// a little counter helper class
class TrieCounter {
public:
TrieCounter(const Trie& root): m_wordCount(0) { add(root); }
unsigned word_count() const { return m_wordCount; }
void use(const Trie& trie) {
m_wordCount++;
decr(trie);
}
private:
void add(const Trie& trie) {
if(trie.is_leaf())
return;
m_count[&trie] = trie.m_leaves.size();
for(Trie::Leaves::const_iterator it=trie.m_leaves.begin();it!=trie.m_leaves.end();++it)
add(*it->second);
}
void decr(const Trie& trie) {
Count::iterator it = m_count.find(&trie);
assert(it != m_count.end());
assert(it->second > 0);
it->second--;
if(it->second == 0)
decr(*it->first->m_parent);
}
private:
typedef std::map<const Trie*, unsigned> Count;
Count m_count;
unsigned m_wordCount;
};
// and the list of substrings
class WordList {
public:
WordList() {}
void add(const std::string& str) { m_wordEnds.push_back(m_trie.add(str.c_str())); }
unsigned size() const { return m_wordEnds.size(); }
const Trie& root() const { return m_trie; }
private:
Trie m_trie;
typedef std::list<const Trie *> Tries;
Tries m_wordEnds;
};
unsigned count_in(const WordList& wordList, const std::string& str) {
typedef std::list<const Trie *> Tries;
typedef Tries::iterator Iter;
TrieCounter counter(wordList.root());
Tries curChecking;
for(unsigned i=0;i<str.size();i++) {
const char c = str[i];
curChecking.push_back(&m_wordList.root());
Iter it = curChecking.begin();
while(it != curChecking.end()) {
Iter next = ++Iter(it);
if(const Trie *child = (*it)->get(c)) {
*it = child;
if(child->is_end_of_word())
counter.use(*child);
} else {
curChecking.erase(it);
}
it = next;
}
}
return counter.word_count();
}
First set up your substrings:
WordList wordList;
wordList.add("foo");
wordList.add("lan");
wordList.add("found");
wordList.add("under");
wordList.add("next");
wordList.add("land");
wordList.add("new");
wordList.add("news");
wordList.add("on");
wordList.add("and");
and then
std::cout << count_in(wordList, "newfoundland") << "\n";