views:

134

answers:

1

I have the following code to read in text, store in a hashmap as bigrams (with other methods to sort them by frequency and do v. v. basic additive smoothing.

I had it working great for one language input file (english) and then I want to expand it for the second language input file (japanese - doens;t matter what it is I suppose) using the same methods but the Japanese bigram hashmap is printing out 3 times in a row with diff. values.

I've tried using diff text in the input file, making sure there are no gaps in text etc. I've also put print statements at certain places in the Japanese part of the code to see if I can get any clues but all the print statements are printing each time so I can't work out if it is looping at a certain place.

I have gone through it with a fine toothcomb but am obviously missing something and slowly going crazy here - any help would be appreciated. thanks in advance...

package languagerecognition2;
import java.lang.String;
import java.io.InputStreamReader;
import java.util.*;
import java.util.Iterator;
import java.util.List.*;
import java.util.ArrayList;
import java.util.AbstractMap.*;
import java.lang.Object;
import java.io.*;
import java.util.Enumeration;
import java.util.Arrays;
import java.lang.Math;


public class Main {    /**

    public static void main(String[] args) {

//training English -----------------------------------------------------------------
         File file = new File("english1.txt");
        StringBuffer contents = new StringBuffer();
        BufferedReader reader = null;

        try
        {
            reader = new BufferedReader(new FileReader(file));
            String test = null;
            //test = reader.readLine();
            // repeat until all lines are read
            while ((test = reader.readLine()) != null)       

           {          
             test = test.toLowerCase();
             char[] charArrayEng = test.toCharArray();
             HashMap<String, Integer> hashMapEng = new HashMap<String, Integer>(bigrams(charArrayEng));


             LinkedHashMap<String, Integer> sortedListEng = new LinkedHashMap<String, Integer>(sort(hashMapEng));


             int sizeEng=sortedListEng.size();
             System.out.println("Total count of English bigrams is " + sizeEng);
             LinkedHashMap<String, Integer> smoothedListEng = new LinkedHashMap<String, Integer>(smooth(sortedListEng, sizeEng));
                        //print linkedHashMap to check values
                         Set set= smoothedListEng.entrySet();
                         Iterator iter = set.iterator (  ) ;
                         System.out.println("Beginning English");

                         while ( iter.hasNext())
                           {
                           Map.Entry entry =  ( Map.Entry ) iter.next (  ) ;
                           Object key = entry.getKey (  ) ;
                           Object value = entry.getValue (  ) ;

                           System.out.println( key+" : " +   value);

                          }
                        System.out.println("End English");
          }//end while

        }//end try
        catch (FileNotFoundException e)
        {
            e.printStackTrace();
        } catch (IOException e)
        {
            e.printStackTrace();
        } finally
        {
            try
            {
                if (reader != null)
                {
                    reader.close();
                }
            } catch (IOException e)
            {
                e.printStackTrace();
            }
        }


//End training English-----------------------------------------------------------

//Training japanese--------------------------------------------------------------

        File file2 = new File("japanese1.txt");
        StringBuffer contents2 = new StringBuffer();
        BufferedReader reader2 = null;

        try
        {
            reader2 = new BufferedReader(new FileReader(file2));
            String test2 = null;

             //repeat until all lines are read
            while ((test2 = reader2.readLine()) != null)
           {

             test2 = test2.toLowerCase();
             char[] charArrayJap = test2.toCharArray();

             HashMap<String, Integer> hashMapJap = new HashMap<String, Integer>(bigrams(charArrayJap));
             //System.out.println( "bigrams stage");

             LinkedHashMap<String, Integer> sortedListJap = new LinkedHashMap<String, Integer>(sort(hashMapJap));
             //System.out.println( "sort stage");

             int sizeJap=sortedListJap.size();
             //System.out.println("Total count of Japanese bigrams is " + sizeJap);

             LinkedHashMap<String, Integer> smoothedListJap = new LinkedHashMap<String, Integer>(smooth(sortedListJap, sizeJap));
                    System.out.println( "smooth stage");
             //print linkedHashMap to check values

                        Set set2= smoothedListJap.entrySet();
                         Iterator iter2 = set2.iterator();
                         System.out.println("Beginning Japanese");

                         while ( iter2.hasNext())
                           {
                           Map.Entry entry2 =  ( Map.Entry ) iter2.next (  ) ;
                           Object key = entry2.getKey (  ) ;
                           Object value = entry2.getValue (  ) ;

                           System.out.println( key+" : " +   value);
                           }//end while
                        System.out.println("End Japanese");
             }//end while


        }//end try
        catch (FileNotFoundException e)
        {
            e.printStackTrace();
        } catch (IOException e)
        {
            e.printStackTrace();
        } finally
        {
            try
            {
                if (reader2 != null)
                {
                    reader2.close();
                }
            } catch (IOException e)
            {
                e.printStackTrace();
            }

        }

//end training Japanese---------------------------------------------------------


} //end main (inner)
+1  A: 

Well thanks for that. Think its and input file problem so i have solved it now.

Aaa