I am trying to implement inverted index in java from few days..but I am unable to implement it.the term and term frequencies are coming nicely but I am unable to retrieve the document Id's.I am not getting the idea how to use two treemap, or how to wrap one treemap inside another treemap.
I am attaching the code here.
I am attaching the code here.
Code:
import java.util.*;
import java.io.*;
public class invertindex{
public static void main (String[] args)
{
TreeMap <String, Integer> t1 = new TreeMap<String, Integer>();
// TreeMap <String, TreeSet> t2 = new TreeMap<String, TreeSet>();
readFile(t1);
//print(t1);
}
public static int getWord
(String word, TreeMap <String, Integer> t1 )
{
if (t1.containsKey(word))
{
return t1.get(word);
}
else {
return 0;
}
}
public static void readFile(TreeMap <String, Integer> t1 )
{
// t1.clear();
Scanner File;
String word;
Integer count;
String Docs [] = {"words.txt", "words2.txt","words3.txt", "words4.txt",};
try
{
for (int x=0; x<Docs.length; x++)
{
t1.clear();
File f= new File(Docs[x]);
BufferedReader br= new BufferedReader(new FileReader(f));
// File = new Scanner(new FileReader(Docs[x]));
String str="";
while ((str=br.readLine())!=null)
{
// word = File.next( );
StringTokenizer stk=new StringTokenizer(str, " ,.-");
while(stk.hasMoreTokens())
{
word=stk.nextToken();
word = word.toLowerCase();
count = getWord(word, t1) + 1;
t1.put(word, count);
}
}
print(t1);
}
}
catch (Exception e)
{
System.err.println(e);
return;
}
}
public static void print(TreeMap<String, Integer> t1)
{
System.out.println("(Term, TermFrequency)");
System.out.println("--------------------");
for(String word : t1.keySet( ))
{
System.out.printf("(%s,%d);", word, t1.get(word));
}
}
}