Hi all,
I have the following already sorted data:
AAA
AAA
TCG
TTT
TTT
TTT
I want to count the occurrence of each string yielding
AAA 2
TCG 1
TTT 3
I know I can do that with "uniq -c", but here I need to do extra processing on the overall C++ code I have.
I am stuck with this construct (modified according to 'pgras' suggestion)
#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
using namespace std;
int main ( int arg_count, char *arg_vec[] ) {
if (arg_count !=2 ) {
cerr << "expected one argument" << endl;
return EXIT_FAILURE;
}
string line;
ifstream myfile (arg_vec[1]);
if (myfile.is_open())
{
int count;
string lastTag = "";
while (getline(myfile,line) )
{
stringstream ss(line);
string Tag;
ss >> Tag; // read first column
//cout << Tag << endl;
if (Tag != lastTag) {
lastTag = Tag;
count = 0;
}
else {
count++;
}
cout << lastTag << " " << count << endl;
}
cout << lastTag << " " << count << endl;
myfile.close();
}
else {cout << "Unable to open file";}
return 0;
}
It prints this instead:
AAA 0
AAA 1
TCT 0
TTT 0
TTT 1
TTT 2
TTT 2