reading xml data

Collapse
X
 
  • Time
  • Show
Clear All
new posts
  • heiro
    New Member
    • Jul 2007
    • 56

    reading xml data

    I have an Xml w/c look like this:
    Code:
    <xml>
      <process name="proc1">
    	   <mkdir>directory</mkdir>
      	   <copyfile>src,dst</copyfile>
      </process>
      
      <process name="proc2">
      	 <copyfile>src,dst</copyfile>
      </process>
    </xml>
    then my problem is how can I get the child nodes of process w/c are "proc1" and "proc2"?
    then i also need to get the value of these child nodes(example "src,dst" for <copyfile>)..
    im using xml.dom.minidom module

    Im hoping for your response guys...
  • heiro
    New Member
    • Jul 2007
    • 56

    #2
    anyone knows???pls help...

    Comment

    • bvdet
      Recognized Expert Specialist
      • Oct 2006
      • 2851

      #3
      You will need to create a parser, something like this:[code=Python]from xml.dom.minidom import parse

      fn = 'sample.xml'

      dom1 = parse(fn)
      # global variable required by handleData
      nameList = ["proc1", "proc2"]

      def getText(nodelis t):
      rc = ""
      for node in nodelist:
      if node.nodeType == node.TEXT_NODE:
      rc = rc + node.data
      return rc

      def handleData(node list, *args):
      resList = []
      for node in nodelist:
      if str(node.attrib utes["name"].value) in nameList:
      for arg in args:
      resList.append( node.getElement sByTagName(arg) )
      return [item[0] for item in resList if item]

      for item in dom1.getElement sByTagName("cop yfile"):
      print getText(item.ch ildNodes)

      process_element s = dom1.getElement sByTagName('pro cess')
      print process_element s

      print handleData(proc ess_elements, "mkdir", "copyfile")

      for item in handleData(proc ess_elements, "mkdir", "copyfile") :
      print getText(item.ch ildNodes)[/code]Contents of sample.xml:
      <xml>
      <process name="proc1">
      <mkdir>director y</mkdir>
      <copyfile>src,d st</copyfile>
      </process>

      <process name="proc2">
      <copyfile>src,d st</copyfile>
      </process>
      <process name="proc3">
      <mkdir>director y</mkdir>
      <copyfile>src,d st</copyfile>
      </process>
      <process name="proc4">
      <mkdir>director y</mkdir>
      <copyfile>src,d st</copyfile>
      </process>
      </xml>
      Output from above code:
      >>> src,dst
      src,dst
      src,dst
      src,dst
      [<DOM Element: process at 0xed2670>, <DOM Element: process at 0xed2f58>, <DOM Element: process at 0xedb4b8>, <DOM Element: process at 0xedb788>]
      [<DOM Element: mkdir at 0xed2e68>, <DOM Element: copyfile at 0xed2e90>, <DOM Element: copyfile at 0xedb0a8>]
      directory
      src,dst
      src,dst
      >>>

      Comment

      • heiro
        New Member
        • Jul 2007
        • 56

        #4
        Originally posted by bvdet
        You will need to create a parser, something like this:[code=Python]from xml.dom.minidom import parse

        fn = 'sample.xml'

        dom1 = parse(fn)
        # global variable required by handleData
        nameList = ["proc1", "proc2"]

        def getText(nodelis t):
        rc = ""
        for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
        rc = rc + node.data
        return rc

        def handleData(node list, *args):
        resList = []
        for node in nodelist:
        if str(node.attrib utes["name"].value) in nameList:
        for arg in args:
        resList.append( node.getElement sByTagName(arg) )
        return [item[0] for item in resList if item]

        for item in dom1.getElement sByTagName("cop yfile"):
        print getText(item.ch ildNodes)

        process_element s = dom1.getElement sByTagName('pro cess')
        print process_element s

        print handleData(proc ess_elements, "mkdir", "copyfile")

        for item in handleData(proc ess_elements, "mkdir", "copyfile") :
        print getText(item.ch ildNodes)[/code]Contents of sample.xml:
        <xml>
        <process name="proc1">
        <mkdir>director y</mkdir>
        <copyfile>src,d st</copyfile>
        </process>

        <process name="proc2">
        <copyfile>src,d st</copyfile>
        </process>
        <process name="proc3">
        <mkdir>director y</mkdir>
        <copyfile>src,d st</copyfile>
        </process>
        <process name="proc4">
        <mkdir>director y</mkdir>
        <copyfile>src,d st</copyfile>
        </process>
        </xml>
        Output from above code:
        >>> src,dst
        src,dst
        src,dst
        src,dst
        [<DOM Element: process at 0xed2670>, <DOM Element: process at 0xed2f58>, <DOM Element: process at 0xedb4b8>, <DOM Element: process at 0xedb788>]
        [<DOM Element: mkdir at 0xed2e68>, <DOM Element: copyfile at 0xed2e90>, <DOM Element: copyfile at 0xedb0a8>]
        directory
        src,dst
        src,dst
        >>>


        thanks bvdet....i'll try this one..thanks

        Comment

        • bvdet
          Recognized Expert Specialist
          • Oct 2006
          • 2851

          #5
          Originally posted by heiro
          thanks bvdet....i'll try this one..thanks
          You are welcome. I am learning about XML and DOM also.

          Comment

          • heiro
            New Member
            • Jul 2007
            • 56

            #6
            Originally posted by bvdet
            You are welcome. I am learning about XML and DOM also.
            I know this is too much :-).
            I want to ask another favor..What if i want the output should look like this:

            process name="proc1"
            mkdir: directory
            copyfile: src,dst


            process name="proc2"
            copyfile: src,dst


            process name="proc3"
            mkdir: directory
            copyfile>src,ds t

            and how can i parse an xml childnode w/c look like this:
            <download ='ftp' user='username' password='passw ord'>

            thanks in advance bvdet..Hope you can help me with this in a second time...

            Comment

            • bvdet
              Recognized Expert Specialist
              • Oct 2006
              • 2851

              #7
              Originally posted by heiro
              I know this is too much :-).
              I want to ask another favor..What if i want the output should look like this:

              process name="proc1"
              mkdir: directory
              copyfile: src,dst


              process name="proc2"
              copyfile: src,dst


              process name="proc3"
              mkdir: directory
              copyfile>src,ds t

              and how can i parse an xml childnode w/c look like this:
              <download ='ftp' user='username' password='passw ord'>

              thanks in advance bvdet..Hope you can help me with this in a second time...
              Create a function to format the data:[code=Python]from xml.dom.minidom import parse

              # global variables required by formatData
              nameList = ["proc1", "proc2"]
              nodeIDlist = ['name',]

              def formatData(node list, *args):
              resList = []
              for node in nodelist:
              for id in nodeIDlist:
              try:
              s = str(node.attrib utes[id].value)
              if s in nameList:
              resList.append( '%s name=%s' % (repr(elem.pare ntNode).split(' :')[1].split()[0], s))
              for arg in args:
              try:
              resList.append( ' %s: %s' % (arg, getText(node.ge tElementsByTagN ame(arg)[0].childNodes)))
              except IndexError, e:
              # print 'Invalid element tag: %s' % arg
              pass
              except KeyError, e:
              # print 'Invalid node atribute:', e
              pass
              return '\n'.join(resLi st)

              dom1 = parse('sample.x ml')[/code][code=Python]>>> process_element s = dom1.getElement sByTagName('pro cess')
              >>> process_element s
              [<DOM Element: process at 0xf8bb98>, <DOM Element: process at 0xf8b918>, <DOM Element: process at 0xf8b710>, <DOM Element: process at 0xf87490>]
              >>> print formatData(proc ess_elements, "mkdir", "copyfile")
              process name=proc1
              mkdir: directory1
              copyfile: src1,dst1
              process name=proc2
              copyfile: src2,dst2
              >>> [/code]The string <download ='ftp' user='username' password='passw ord'> does not appear to be valid XML. Should not there be an attribute name to the left of the equal sign after 'download'?

              Comment

              • heiro
                New Member
                • Jul 2007
                • 56

                #8
                Originally posted by bvdet
                Create a function to format the data:[code=Python]from xml.dom.minidom import parse

                # global variables required by formatData
                nameList = ["proc1", "proc2"]
                nodeIDlist = ['name',]

                def formatData(node list, *args):
                resList = []
                for node in nodelist:
                for id in nodeIDlist:
                try:
                s = str(node.attrib utes[id].value)
                if s in nameList:
                resList.append( '%s name=%s' % (repr(elem.pare ntNode).split(' :')[1].split()[0], s))
                for arg in args:
                try:
                resList.append( ' %s: %s' % (arg, getText(node.ge tElementsByTagN ame(arg)[0].childNodes)))
                except IndexError, e:
                # print 'Invalid element tag: %s' % arg
                pass
                except KeyError, e:
                # print 'Invalid node atribute:', e
                pass
                return '\n'.join(resLi st)

                dom1 = parse('sample.x ml')[/code][code=Python]>>> process_element s = dom1.getElement sByTagName('pro cess')
                >>> process_element s
                [<DOM Element: process at 0xf8bb98>, <DOM Element: process at 0xf8b918>, <DOM Element: process at 0xf8b710>, <DOM Element: process at 0xf87490>]
                >>> print formatData(proc ess_elements, "mkdir", "copyfile")
                process name=proc1
                mkdir: directory1
                copyfile: src1,dst1
                process name=proc2
                copyfile: src2,dst2
                >>> [/code]The string <download ='ftp' user='username' password='passw ord'> does not appear to be valid XML. Should not there be an attribute name to the left of the equal sign after 'download'?

                it actually look like this..

                <process name='download' >
                <download server='ftp' user='username' password='***** *'>
                <destination>pa th</destination>
                <unzip>*.jpg, *.doc, *.pdf</unzip>
                </download>
                </process>

                Actually I'm making a program right now and its output depends on the xml.
                You help a me a lot bvdet..Thanks man

                Comment

                • bvdet
                  Recognized Expert Specialist
                  • Oct 2006
                  • 2851

                  #9
                  I have played around with XML parsing, and I made a new function. It is kind of ugly and does not work exactly the way I want, so maybe someone can improve it. Following is the complete code:[code=Python]from xml.dom.minidom import parse

                  def getText(nodelis t):
                  rc = []
                  for node in nodelist:
                  if node.nodeType == node.TEXT_NODE:
                  s = node.data.strip ()
                  if s:
                  rc.append(node. data)
                  return '\n'.join(rc)

                  def nodeName(node):
                  try: return repr(node).spli t(':')[1].split()[0]
                  except: return ''

                  def getDataList(nod elist, **kargs):
                  resList = []
                  for node in nodelist:
                  node_name = nodeName(node)
                  if node_name in kargs:
                  keys = kargs[node_name].keys()
                  for id in keys:
                  try:
                  s = str(node.attrib utes[id].value)
                  v = kargs[node_name][id]
                  if not v or s in kargs[node_name][id]:
                  resList.append( '%s %s=%s' % (node_name, id, s))

                  if node.nodeType == node.ELEMENT_NO DE:
                  nodes = node.childNodes
                  name = node.nodeName
                  print 'DOM element = %s' % name
                  s = []
                  for elem in nodes:
                  nm = nodeName(elem)
                  s.append(' %s%s' % (['', nm+': '][len(nm)>0 or 0],getText(elem.c hildNodes)))
                  print '\n'.join([i for i in s if i.strip()])
                  elif node.nodeType == node.TEXT_NODE:
                  s = getText(node)
                  print 'Text Node Text = %s' % s

                  except KeyError, e:
                  print 'Invalid node atribute:', e
                  pass
                  return resList

                  fn = r'H:\TEMP\temsy s\sampleXML.txt '

                  dom1 = parse(fn)

                  process_element s = dom1.getElement sByTagName('pro cess')
                  download_elemen ts = dom1.getElement sByTagName('dow nload')

                  elemDict = {'process': {'name': ["proc1", "proc2"]}, 'download': {'server': ['ftp', ]}}
                  x = getDataList(pro cess_elements, **elemDict)
                  y = getDataList(dow nload_elements, **elemDict)

                  print
                  print x
                  print y[/code]Output:
                  >>> DOM element = process
                  mkdir: directory1
                  mkdir: directory11
                  mkdir: directory111
                  copyfile: src1,dst1
                  DOM element = process
                  copyfile: src2,dst2
                  DOM element = download
                  destination: path
                  unzip: *.jpg, *.doc, *.pdf

                  ['process name=proc1', 'process name=proc2']
                  ['download server=ftp']
                  >>>

                  Comment

                  Working...