i have an html document as follows
<html>
<body>
<h1>world</h1>
<br>
List
</body>
</html>
i want the path of each node of its dom tree representation
i wrote a code using htmlparser library
import org.htmlparser. Parser;
import org.htmlparser. Node;
import org.htmlparser. util.NodeList;
import org.htmlparser. util.NodeIterat or;
import org.htmlparser. util.ParserExce ption;
public class ParserMain
{
public static void main(String[] args)throws ParserException
{
String str=null, stri=null;
Parser parser = new Parser ("file:///E:/project/d2.html");
NodeList list = parser.parse (null);
Node[] node = new Node[500];
int j=0;
Node rootnode = list.elementAt (0);
str=rootnode.ge tText();
System.out.prin tln("root::"+st r);
NodeList sublist = rootnode.getChi ldren ();
System.out.prin tln("size is"+sublist.siz e());
NodeIterator i = sublist.element s();
while(i.hasMore Nodes())
{
j++;
node [j]= i.nextNode();
str=node[j].getText();
System.out.prin tln("nodes are::"+str);
}
}
}
i'm not getting the name of every nodes.. i read that '\n' is also considered an element in the Nodelist. but i cant resolve it..
i need the path of each node as a string as the output.
can anyone help me??
<html>
<body>
<h1>world</h1>
<br>
List
</body>
</html>
i want the path of each node of its dom tree representation
i wrote a code using htmlparser library
import org.htmlparser. Parser;
import org.htmlparser. Node;
import org.htmlparser. util.NodeList;
import org.htmlparser. util.NodeIterat or;
import org.htmlparser. util.ParserExce ption;
public class ParserMain
{
public static void main(String[] args)throws ParserException
{
String str=null, stri=null;
Parser parser = new Parser ("file:///E:/project/d2.html");
NodeList list = parser.parse (null);
Node[] node = new Node[500];
int j=0;
Node rootnode = list.elementAt (0);
str=rootnode.ge tText();
System.out.prin tln("root::"+st r);
NodeList sublist = rootnode.getChi ldren ();
System.out.prin tln("size is"+sublist.siz e());
NodeIterator i = sublist.element s();
while(i.hasMore Nodes())
{
j++;
node [j]= i.nextNode();
str=node[j].getText();
System.out.prin tln("nodes are::"+str);
}
}
}
i'm not getting the name of every nodes.. i read that '\n' is also considered an element in the Nodelist. but i cant resolve it..
i need the path of each node as a string as the output.
can anyone help me??