views:

157

answers:

2

Dear all,

I have a data that looks like this:

>day11:1:356617
ACTTCTGATTCTGACAGACTCAGGAAGAAACCAT
>day11:2:283282
CTCAGCCCGTAGCCCGTCGGTTCCGGAGTAAGTT
>day11:3:205058
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
>day11:4:202520
AGTTCGATCGGTAGCGGGAGCGGAGAGCGGACCC
>day11:5:107099
AGGCATTCAGGCAGCGAGAGCAGAGCAGCGTAGA
>day11:6:106715
CTCTTTGCCCCATCTACTGCGAGGATGAAGACCA

What I want to do is to create a map, with line started with ">" as the key and the ACGT as the value.

However this construct of mine doesn't work? The Map seems fail to capture the value as I expected.

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <map>
int main() {

     ifstream myfile ("mydata.txt");

    map <string,string>FastaMap;

    cerr << "Read Fasta File to Map" << endl;

    if (myfile.is_open())
    {
        while (getline(myfile,line) )
        {
            stringstream ss(line);
            string Fasta;
            string Header = "";
            string Tag = "";

            ss >> Fasta; // read first column

            if ( Fasta[0] == '>') {
                 // get header only 
                 Header = Fasta.substr(1);
                 //cerr << Header << endl;
            }
            else {
                Tag = Fasta;
            }


            if (Header != "" || Tag != "") {
                FastaMap[Header] = Tag;
                //cout << "TAG: " << Tag << endl;
                //cout << "Head: " << Header << endl;
                // FastaMap.insert(make_pair(Header,Tag));
           }
        }
        myfile.close();
    }
    else  {
        cout << "Unable to open file";
    }

    // This doesn't print the second value, only prints the first

    for (map<string,string>::iterator it = FastaMap.begin(); it!=
            FastaMap.end(); it++) {
         cout << "Head: " << (*it).first << ", End: " << (*it).second << endl;
    }

}

The expected output is:

Head: day11:1:356617, End: ACTTCTGATTCTGACAGACTCAGGAAGAAACCAT
Head: day11:2:283282, End: CTCAGCCCGTAGCCCGTCGGTTCCGGAGTAAGTT
...etc...
+1  A: 

Bug in if: if (Header != "" || Tag != "") should be: if (Header != "" && Tag != "")

Even more:

if (Header != "" && Tag != "") {
                    FastaMap[Header] = Tag;
                    Header = "";
                    Tag = "";
}
Drakosha
+4  A: 

You're cleaning Fasta, Header and Tag each loop. What you have to do is:

  1. Declare those variables outside the while (just before it)
  2. Change the if (Header != "" || Tag != "") line to using && instead of || (there is a logic error there)
  3. Reset the Tag and Header variables when you add them to the map.

The correct code follows:

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <map>
using namespace std;
int main() {
        string line;
        ifstream myfile ("test");

        map <string,string> FastaMap;

        cerr << "Read Fasta File to Map" << endl;

        if (myfile.is_open())
        {
                string Fasta;
                string Header = "";
                string Tag = "";
                while (getline(myfile,line) )
                {

                        stringstream ss(line);

                        ss >> Fasta; // read first column

                        if ( Fasta[0] == '>') {
                                // get header only
                                Header = Fasta.substr(1);
                                //cerr << Header << endl;
                        }
                        else {
                                Tag = Fasta;
                        }


                        if (Header != "" && Tag != "") {
                                FastaMap[Header] = Tag;
                                cout << "TAG: " << Tag << endl;
                                cout << "Head: " << Header << endl;
                                Header = "";
                                Tag = "";
                                // FastaMap.insert(make_pair(Header,Tag));
                        }
                }
                myfile.close();
        }
        else  {
                cout << "Unable to open file";
        }

        // This doesn't print the second value, only prints the first

        for (map<string,string>::iterator it = FastaMap.begin(); it!=
                     FastaMap.end(); it++) {
                cout << "Head: " << (*it).first << ", End: " << (*it).second << endl;
        }

}

Note there are other possible enhancements to the code, but as it is now works.

Diego Sevilla