email parsing

Collapse
This topic is closed.
X
X
 
  • Time
  • Show
Clear All
new posts
  • ra9ftm

    email parsing

    It is my first script on python. Don't know is it correctly uses
    modules, but it is working fine with specially with russian code pages
    and mime formated messages. Also quoted-printable and base64
    encoded....

    It will be very good if anybody post any comments on this script. Is
    it good or bad...


    import email
    import mailbox
    from email.Header import decode_header
    from email.Header import make_header
    import string
    import sys

    outEnc="cp866"
    infile=sys.argv[1]

    subStrObrez = []
    subStrObrez.app end("~~~~~~~~~~ ~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~ ~~~~~")
    subStrObrez.app end("""~~~~~~~~ ~~~~~~~~~~~~~~~ ~~~~~~~~
    To UNSUBSCRIBE from this forum, send an email to:""")
    subStrObrez.app end("~~~~~~~~~~ ~~~~~~~~")

    # Cut yahoo info at the end of message
    def obrez(strMsg):
    for s in subStrObrez:
    n = string.rfind(st rMsg,s)
    if n != -1:
    return strMsg[0:n]
    return strMsg

    # Convert message header
    def my_get_header(s tr):
    str2=""
    for val,encoding in decode_header(s tr):
    if encoding:
    str2 = str2+ val.decode(enco ding)+" "
    else:
    str2 = str2+ val+" "
    return str2

    # Process the message
    def proc(msg):
    print 'From : '+ my_get_header(m sg['From']).encode(outEnc )
    print 'To : '+ my_get_header(m sg['To']).encode(outEnc )
    print 'Subject: '+ my_get_header(m sg['Subject']).encode(outEnc )
    print

    if msg.is_multipar t():
    for part in msg.walk():
    if part.get_conten t_type() == "text/plain":
    if part.get_conten t_charset():
    print
    obrez(part.get_ payload(None,Tr ue).decode(part .get_content_ch arset()).encode (outEnc))
    else:
    print obrez(part.get_ payload(None,Tr ue))

    else:
    if msg.get_content _type() == "text/plain":
    if msg.get_content _charset():
    print
    obrez( (msg.get_payloa d(None,True)).d ecode(msg.get_c ontent_charset( )) ).encode(outEnc )
    else:
    print obrez( msg.get_payload (None,True) )
    else:
    if msg.get_content _type() == "text/html":
    if msg.get_content _charset():
    print
    (msg.get_payloa d(None,True)).d ecode(msg.get_c ontent_charset( )).encode(outEn c)
    else:
    print msg.get_payload (None,True)


    ############### ############### ############### ############### ############### #########
    # The main program

    f = open(infile, "rb")
    m1 = mailbox.UnixMai lbox(f)

    RubLst=[]
    RubLst.append(["[contestru]","FOTSTR"])
    RubLst.append(["[russiandx]","FORUDX"])

    for msg in mailbox.UnixMai lbox(f,email.me ssage_from_file ):
    for rub in RubLst:
    if string.find(my_ get_header(msg['Subject']),rub[0]) != -1 :
    print "SB "+rub[1]+"@FORUM < INET"
    print my_get_header(m sg['Subject']).encode(outEnc )
    print
    proc(msg)
    print
    print "powered by Python"
    print "/EX"
Working...