I have a data set that looks like this:
000 100 200 300 010 020 030 001 002 003
001 101 201 301 011 021 031 000 002 003
002 102 202 302 012 022 032 001 000 003
003 103 203 303 013 023 033 001 002 000
010 110 210 310 000 020 030 011 012 013
020 120 220 320 010 000 030 021 022 023
030 130 230 330 010 020 000 031 032 033
033 133 233 333 013 023 003 031 032 030
100 000 200 300 110 120 130 101 102 103
133 033 233 333 113 123 103 131 132 130
200 100 000 300 210 220 230 201 202 203
233 133 033 333 213 223 203 231 232 230
300 100 200 000 310 320 330 301 302 303
303 103 203 003 313 323 333 301 302 300
313 113 213 013 303 323 333 311 312 310
330 130 230 030 310 320 300 331 332 333
331 131 231 031 311 321 301 330 332 333
332 132 232 032 312 322 302 331 330 333
333 133 233 033 313 323 303 331 332 330
What I intend to do is to generate list of unique strings from it, yielding:
000
001
002
003
010
011
012
013
020
021
022
023
030
031
032
033
100
101
102
103
110
113
120
123
130
131
132
133
200
201
202
203
210
213
220
223
230
231
232
233
300
301
302
303
310
311
312
313
320
321
322
323
330
331
332
333
The code I have to generate that is this. But it is very memory consumptive. Because in reality the string is of length >36 and there are more than 35 million lines in a file. Each line with >36*3 number of columns/entries. Is there a memory efficient way to do it?
#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <map>
using namespace std;
int main ( int arg_count, char *arg_vec[] ) {
if (arg_count !=2 ) {
cerr << "expected one argument" << endl;
return EXIT_FAILURE;
}
string line;
ifstream myfile (arg_vec[1]);
map <string,int> Tags;
if (myfile.is_open())
{
while (getline(myfile,line) )
{
stringstream ss(line);
string Elem;
while (ss >> Elem) {
Tags[Elem] = 1;
}
}
myfile.close();
}
else { cout << "Unable to open file";}
for (map <string,int>::iterator iter = Tags.begin(); iter !=
Tags.end();iter++) {
cout << (*iter).first << endl;
}
return 0;
}