Implementing inverted index in Java

Collapse
X
 
  • Time
  • Show
Clear All
new posts
  • 29294
    New Member
    • Aug 2013
    • 8

    Implementing inverted index in Java

    I am trying to implement inverted index in java from few days..but I am unable to implement it.the term and term frequencies are coming nicely but I am unable to retrieve the document Id's.I am not getting the idea how to use two treemap, or how to wrap one treemap inside another treemap.

    I am attaching the code here.
    Code:
    import java.util.*;  
    import java.io.*;  
    
    public class invertindex{
    
    public static void main (String[] args)
    {
         TreeMap <String, Integer> t1 = new TreeMap<String, Integer>();
       // TreeMap <String, TreeSet> t2 = new TreeMap<String, TreeSet>();
         readFile(t1);  
         //print(t1);  
    }
    
    public static int getWord
            (String word, TreeMap <String, Integer> t1 )
    {
     if (t1.containsKey(word))
     {
        return t1.get(word);
     }
     else {
         return 0;
     }
    }
            
    
    public static void readFile(TreeMap <String, Integer> t1 )
    {
    //    t1.clear();
        Scanner File;
        String word; 
        Integer count;
        String Docs [] = {"words.txt", "words2.txt","words3.txt", "words4.txt",};
       try  
          {      
    for (int x=0; x<Docs.length; x++)  
    { 
       t1.clear();
         
              File f= new File(Docs[x]);
              BufferedReader br= new BufferedReader(new FileReader(f));
              
            // File = new Scanner(new FileReader(Docs[x]));  
         
       String str="";
          while ((str=br.readLine())!=null)  
          {   
    //  word = File.next( );  
              StringTokenizer stk=new StringTokenizer(str, " ,.-");
              while(stk.hasMoreTokens())
              {
                 word=stk.nextToken();
              word = word.toLowerCase(); 
      
      count = getWord(word, t1) + 1;  
      t1.put(word, count);  
              }
          }
          
       print(t1);
       }
           } 
          
          catch (Exception e)  
          {  
     System.err.println(e);  
     return;  
          }
       }
    
    public static void print(TreeMap<String, Integer> t1)
    {
        System.out.println("(Term, TermFrequency)");
        System.out.println("--------------------");
        
         for(String word : t1.keySet( ))  
          {  
             System.out.printf("(%s,%d);", word, t1.get(word));  
          }  
       
    }
    }
Working...