valid point...!!
here's the test python.. ugly as it is!!
#!/usr/bin/python
#
# test.py
#
# scrapes/extracts the basic data for the college
#
#
# the app gets/stores
# name
# url
# address (street/city/state
# phone
#
############### ############### ############### ############### ##########3
#test python script
import re
import libxml2dom
import urllib
import urllib2
import sys, string
from mechanize import Browser
import mechanize
#import tidy
import os.path
import cookielib
from libxml2dom import Node
from libxml2dom import NodeList
import subprocess
import MySQLdb
#import mysql_config
import time
############### #########
#
# Parse pricegrabber.co m
############### #########
urlopen = urllib2.urlopen
##cj = urllib2.cookiel ib.LWPCookieJar ()
Request = urllib2.Request
br = Browser()
br2 = Browser()
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values1 = {'name' : 'Michael Foord',
'location' : 'Northampton',
'language' : 'Python' }
headers = { 'User-Agent' : user_agent }
url="http://schedule.berkel ey.edu/"
url="http://schedule.psu.ed u/"
#============== =============== ==========
if __name__ == "__main__":
# main app
txdata = None
#----------------------------
# get the kentucky test pages
#br.set_cookiej ar(cj)
br.set_handle_r edirect(True)
br.set_handle_r eferer(True)
br.set_handle_r obots(False)
br.addheaders = [('User-Agent', 'Firefox')]
#cnt is the page count for the master url
murl=url
print "url =",murl
br.open(murl)
#cj.save(COOKIE FILE) # resave cookies
res = br.response() # this is a copy of response
s = res.read()
# s contains HTML not XML text
d = libxml2dom.pars eString(s, html=1)
#get the input/text dialogs
#tn1 = "//div[@id='main_conte nt']/form[1]/input[position()=1]/@name"
q="//img/parent::*/attribute::href "
q="//form[@name='cos_sear ch1']/@action"
t1=d.xpath(q)
print "href = ",t1
print "hnode =",t1[0].nodeValue
print "htest =",t1[0].textContent
print "htesttt =",t1[0].toString()
sys.exit()
thanks!!
-----Original Message-----
From: python-list-bounces+bedougl as=earthlink.ne t@python.org
[mailto:python-list-bounces+bedougl as=earthlink.ne t@python.org]On Behalf
Of Fredrik Lundh
Sent: Saturday, August 23, 2008 5:58 AM
To: python-list@python.org
Subject: Re: xpath questions...
bruce wrote:
be
using
there was zero Python content left after the simplification. maybe you
should at least mention what library you're using to "play around with
xpath and the html dom" ?
</F>
--
here's the test python.. ugly as it is!!
#!/usr/bin/python
#
# test.py
#
# scrapes/extracts the basic data for the college
#
#
# the app gets/stores
# name
# url
# address (street/city/state
# phone
#
############### ############### ############### ############### ##########3
#test python script
import re
import libxml2dom
import urllib
import urllib2
import sys, string
from mechanize import Browser
import mechanize
#import tidy
import os.path
import cookielib
from libxml2dom import Node
from libxml2dom import NodeList
import subprocess
import MySQLdb
#import mysql_config
import time
############### #########
#
# Parse pricegrabber.co m
############### #########
urlopen = urllib2.urlopen
##cj = urllib2.cookiel ib.LWPCookieJar ()
Request = urllib2.Request
br = Browser()
br2 = Browser()
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values1 = {'name' : 'Michael Foord',
'location' : 'Northampton',
'language' : 'Python' }
headers = { 'User-Agent' : user_agent }
url="http://schedule.berkel ey.edu/"
url="http://schedule.psu.ed u/"
#============== =============== ==========
if __name__ == "__main__":
# main app
txdata = None
#----------------------------
# get the kentucky test pages
#br.set_cookiej ar(cj)
br.set_handle_r edirect(True)
br.set_handle_r eferer(True)
br.set_handle_r obots(False)
br.addheaders = [('User-Agent', 'Firefox')]
#cnt is the page count for the master url
murl=url
print "url =",murl
br.open(murl)
#cj.save(COOKIE FILE) # resave cookies
res = br.response() # this is a copy of response
s = res.read()
# s contains HTML not XML text
d = libxml2dom.pars eString(s, html=1)
#get the input/text dialogs
#tn1 = "//div[@id='main_conte nt']/form[1]/input[position()=1]/@name"
q="//img/parent::*/attribute::href "
q="//form[@name='cos_sear ch1']/@action"
t1=d.xpath(q)
print "href = ",t1
print "hnode =",t1[0].nodeValue
print "htest =",t1[0].textContent
print "htesttt =",t1[0].toString()
sys.exit()
thanks!!
-----Original Message-----
From: python-list-bounces+bedougl as=earthlink.ne t@python.org
[mailto:python-list-bounces+bedougl as=earthlink.ne t@python.org]On Behalf
Of Fredrik Lundh
Sent: Saturday, August 23, 2008 5:58 AM
To: python-list@python.org
Subject: Re: xpath questions...
bruce wrote:
Regarding the xpath question I've posed, some have said that it shouldn't
here on the mailing list. Give that I'm writing the test scripts/apps in
python, using the python libs, where else should it be posted?
>
I mean, I could post the entire sample script so you can see that it's
python, using the python libs, where else should it be posted?
>
I mean, I could post the entire sample script so you can see that it's
python, but I simplified the issue.
should at least mention what library you're using to "play around with
xpath and the html dom" ?
</F>
--