slow import

**William Manley** · Sep 1 '07, 01:15 AM

It's because your code is executed everytime it is imported. enclosing it in a function would fix the problem.

so change

[CODE=python]
import urllib
import re
page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=& CIK=&type=10-Q&owner=include &count=100&acti on=getcurrent'
raw = []
for line in urllib.urlopen( page):
if '<td bgcolor="#E6E6E 6" valign="top" align="left"><a href="' in line:
raw.append(line )

codestring = ' '.join(raw)
pattern = re.compile('/\S+')
results = re.findall(patt ern, codestring)

pageroot = 'http://www.sec.gov'
count= len(results)

fn = open("c://Python25/tmp.txt", 'w')

line10q = []
number = 0
while number < count:
newpage = pageroot + results[number]
for line in urllib.urlopen( newpage):
if '<td nowrap="nowrap" ><a href="' in line:
line10q.append( line)
fn.write(line)
number += 1

fn.close()

line10qstring = ' '.join(line10q)
pattern2 = re.compile('="/\S+">')
results10q = re.findall(patt ern, line10qstring)

newstring = ' '.join(results1 0q)
pattern3 = re.compile('/\S+.htm')
linkresults = re.findall(patt ern3, newstring)

pattern4 = re.compile('/\S+.[a-z]{3}"')
linktest2 = ' '.join(linkresu lts)
link2 = re.findall(patt ern4, linktest2)

link2string = ' '.join(link2)
pattern5 = re.compile('/\S+.htm')
link4 = re.findall(patt ern5, link2string)
link4string = ' '.join(link4)

linkNumber = len(link4)

[/CODE]

to

[CODE=python]
import urlib
import re

def myfunc():
page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=& CIK=&type=10-Q&owner=include &count=100&acti on=getcurrent'
# rest of code....
[/CODE]

That way you just do:
[CODE=python]
import myscript
myscript.myfunc ()
[/code]
and your done!

slow import

slow import

Comment