Help with cookies/authentication

**Formula** · Aug 10 '08, 10:01 PM

Try this code will give you all cookies will be registered in a file
from the schoolfinder.co m

Code:

#!/usr/local/bin/python





COOKIEFILE = 'cookies.lwp'          # the path and filename that you want to use to save your cookies in

import os.path

import sys



cj = None

ClientCookie = None

cookielib = None



try:                                    # Let's see if cookielib is available

    import cookielib            

except ImportError:

    pass

else:

    import urllib2    

    urlopen = urllib2.urlopen

    cj = cookielib.LWPCookieJar()       # This is a subclass of FileCookieJar that has useful load and save methods

    Request = urllib2.Request



if not cookielib:                   # If importing cookielib fails let's try ClientCookie

    try:                                            

        import ClientCookie 

    except ImportError:

        import urllib2

        urlopen = urllib2.urlopen

        Request = urllib2.Request

    else:

        urlopen = ClientCookie.urlopen

        cj = ClientCookie.LWPCookieJar()

        Request = ClientCookie.Request

        

####################################################

# We've now imported the relevant library - whichever library is being used urlopen is bound to the right function for retrieving URLs

# Request is bound to the right function for creating Request objects

# Let's load the cookies, if they exist. 

    

if cj != None:                                  # now we have to install our CookieJar so that it is used as the default CookieProcessor in the default opener handler

    if os.path.isfile(COOKIEFILE):

        cj.load(COOKIEFILE)

    if cookielib:

        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

        urllib2.install_opener(opener)

    else:

        opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))

        ClientCookie.install_opener(opener)



# If one of the cookie libraries is available, any call to urlopen will handle cookies using the CookieJar instance we've created

# (Note that if we are using ClientCookie we haven't explicitly imported urllib2)

# as an example :



theurl = 'http://schoolfinder.com/login/login.asp'         # an example url that sets a cookie, try different urls here and see the cookie collection you can make !
body={'usr':'greenman','pwd':'greenman'}

from urllib import urlencode


txdata = urlencode(body)                                                                           # if we were making a POST type request, we could encode a dictionary of values here - using urllib.urlencode

txheaders =  {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}          # fake a user agent, some websites (like google) don't like automated exploration



try:

    req = Request(theurl, txdata, txheaders)            # create a request object

    handle = urlopen(req)                               # and open it to return a handle on the url

except IOError, e:

    print 'We failed to open "%s".' % theurl

    if hasattr(e, 'code'):

        print 'We failed with error code - %s.' % e.code

    elif hasattr(e, 'reason'):

        print "The error object has the following 'reason' attribute :", e.reason

        print "This usually means the server doesn't exist, is down, or we don't have an internet connection."

        sys.exit()

        

else:

    print 'Here are the headers of the page :'

    print handle.info()                             # handle.read() returns the page, handle.geturl() returns the true url of the page fetched (in case urlopen has followed any redirects, which it sometimes does)



print

if cj == None:

    print "We don't have a cookie library available - sorry."

    print "I can't show you any cookies."

else:

    print 'These are the cookies we have received so far :'

    for index, cookie in enumerate(cj):

        print index, '  :  ', cookie        

    cj.save(COOKIEFILE)                     # save the cookies again

**trihaitran** · Aug 30 '08, 08:56 PM

Thanks for the help. Your code by itself did not work, but it pushed me in the right direction. Here is what worked for me and let me see the protected pages:

Code:

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import cookielib
import urllib
import urllib2

cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
resp = opener.open('http://schoolfinder.com') # save a cookie

theurl = 'http://schoolfinder.com/login/login.asp' # an example url that sets a cookie, try different urls here and see the cookie collection you can make !
body={'usr':'greenman','pwd':'greenman'}
txdata = urllib.urlencode(body) # if we were making a POST type request, we could encode a dictionary of values here - using urllib.urlencode
txheaders =  {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} # fake a user agent, some websites (like google) don't like automated exploration


try:
    req = urllib2.Request(theurl, txdata, txheaders) # create a request object
    handle = opener.open(req) # and open it to return a handle on the url
    HTMLSource = handle.read()
    f = file('test.html', 'w')
    f.write(HTMLSource)
    f.close()

except IOError, e:
    print 'We failed to open "%s".' % theurl
    if hasattr(e, 'code'):
        print 'We failed with error code - %s.' % e.code
    elif hasattr(e, 'reason'):
        print "The error object has the following 'reason' attribute :", e.reason
        print "This usually means the server doesn't exist, is down, or we don't have an internet connection."
        sys.exit()

else:
    print 'Here are the headers of the page :'
    print handle.info() # handle.read() returns the page, handle.geturl() returns the true url of the page fetched (in case urlopen has followed any redirects, which it sometimes does)

**johnpollard** · Oct 29 '08, 08:48 PM

Your script works for me, but the one below for another site does not. The test.html file is not my logged in file like it is when I run your script.

The only lines of code I changed are;
resp = opener.open('ht tp://www.amm.com/')
theurl = 'http://www.amm.com/login.asp'
body={'username ':'AMMT54590570 ','password':'A MMT32564288'}

What am I doing wrong?

-----------------------------------

Code:

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
 
import cookielib
import urllib
import urllib2
 
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
resp = opener.open('http://www.amm.com/login.asp') # save a cookie
 
theurl = 'http://www.amm.com/login.asp'
# an example url that sets a cookie, try different urls here and see the cookie collection you can make !
body={'username':'AMMT54590570','password':'AMMT32564288'}
txdata = urllib.urlencode(body)
# if we were making a POST type request, we could encode a dictionary of values here - using urllib.urlencode
txheaders =  {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
# fake a user agent, some websites (like google) don't like automated exploration
 
 
try:
    req = urllib2.Request(theurl, txdata, txheaders) # create a request object
    handle = opener.open(req) # and open it to return a handle on the url
    HTMLSource = handle.read()
    f = file('test.html', 'w')
    f.write(HTMLSource)
    f.close()
 
except IOError, e:
    print 'We failed to open "%s".' % theurl
    if hasattr(e, 'code'):
        print 'We failed with error code - %s.' % e.code
    elif hasattr(e, 'reason'):
        print "The error object has the following 'reason' attribute :", e.reason
        print "This usually means the server doesn't exist, is down, or we don't have an internet connection."
        sys.exit()
 
else:
    print 'Here are the headers of the page :'
    print handle.info() # handle.read() returns the page, handle.geturl() returns the true url of the page fetched (in case urlopen has followed any redirects, which it sometimes does)

Help with cookies/authentication

Help with cookies/authentication

Comment

Comment

Comment