any other best way of reading the file

Collapse
X
 
  • Time
  • Show
Clear All
new posts
  • psbasha
    Contributor
    • Feb 2007
    • 440

    any other best way of reading the file

    Hi,

    I am here with placing the Sample code for reading the and Input data mentioned.

    Is there any best of reading the file?.

    Thanks in advance
    PSB
    Code:
    Sampple1.txt
    Rect    1       1       2       7       6
    Rect    2       2       3       8       7
    Rect    3       3       4       9       8
    Tria    4       4       5       9
    Pnt     1       0.      0.      0.
    Pnt     2       5.      0.      0.
    Pnt     3       10.     0.      0.
    Pnt     4       15.     0.      0.
    Pnt     5       20.     0.      0.
    Pnt     6       0.      5.      0.
    Pnt     7       5.      5.      0.
    Pnt     8       10.     5.      0.
    Pnt     9       15.     5.      0.
    Code:
    Sample.py
    def read_file_data(strFile):
          
        f = open(strFile,'r')        
    
        pntIDDict = {}
        pntCoordDict = {}
        pntList = []             
        coordList = []
        wireIDDict ={}
            
        while True:
            strTemp = f.readline()
            
            if len(strTemp)>=1:            
                strTemp = strTemp[:(len(strTemp)-1)]                                        
                if strTemp[:3]=='Pnt':                         
                    pntID = int(strTemp[8:16])                        
                    coordList.append((float(strTemp[16:24])))
                    coordList.append((float(strTemp[24:32])))
                    coordList.append((float(strTemp[32:40])))                      
                                                    
                    pntIDDict[pntID]=coordList                        
                    coordList = []                        
                            
                elif (strTemp[:4]=='Rect' or strTemp[:4]=='Tria'):
                    wireID = int(strTemp[8:16])                        
                    pntList.append((int(strTemp[16:24])))
                    pntList.append((int(strTemp[24:32])))
                    pntList.append((int(strTemp[32:40])))    
                         
                    if (strTemp[:4]=='Rect'):
                        pntList.append((int(strTemp[40:48])))                    
                            
                    wireIDDict[wireID]=pntList
                    pntList = []
            else:
                break
                    
        f.close()
    
        return pntIDDict,wireIDDict
    
    if __name__ == '__main__':
    
        pntIDDict = {}
        wireIDDict = {}
        ntIDDict,wireIDDict = read_file_data ("c:\\Sample1.txt")
        print ntIDDict,wireIDDict
  • bartonc
    Recognized Expert Expert
    • Sep 2006
    • 6478

    #2
    Originally posted by psbasha
    Hi,

    I am here with placing the Sample code for reading the and Input data mentioned.

    Is there any best of reading the file?.

    Thanks in advance
    PSB
    Code:
    Sampple1.txt
    Rect    1       1       2       7       6
    Rect    2       2       3       8       7
    Rect    3       3       4       9       8
    Tria    4       4       5       9
    Pnt     1       0.      0.      0.
    Pnt     2       5.      0.      0.
    Pnt     3       10.     0.      0.
    Pnt     4       15.     0.      0.
    Pnt     5       20.     0.      0.
    Pnt     6       0.      5.      0.
    Pnt     7       5.      5.      0.
    Pnt     8       10.     5.      0.
    Pnt     9       15.     5.      0.
    Code:
    Sample.py
    def read_file_data(strFile):
          
        f = open(strFile,'r')        
    
        pntIDDict = {}
        pntCoordDict = {}
        pntList = []             
        coordList = []
        wireIDDict ={}
            
        while True:
            strTemp = f.readline()
            
            if len(strTemp)>=1:            
                strTemp = strTemp[:(len(strTemp)-1)]                                        
                if strTemp[:3]=='Pnt':                         
                    pntID = int(strTemp[8:16])                        
                    coordList.append((float(strTemp[16:24])))
                    coordList.append((float(strTemp[24:32])))
                    coordList.append((float(strTemp[32:40])))                      
                                                    
                    pntIDDict[pntID]=coordList                        
                    coordList = []                        
                            
                elif (strTemp[:4]=='Rect' or strTemp[:4]=='Tria'):
                    wireID = int(strTemp[8:16])                        
                    pntList.append((int(strTemp[16:24])))
                    pntList.append((int(strTemp[24:32])))
                    pntList.append((int(strTemp[32:40])))    
                         
                    if (strTemp[:4]=='Rect'):
                        pntList.append((int(strTemp[40:48])))                    
                            
                    wireIDDict[wireID]=pntList
                    pntList = []
            else:
                break
                    
        f.close()
    
        return pntIDDict,wireIDDict
    
    if __name__ == '__main__':
    
        pntIDDict = {}
        wireIDDict = {}
        ntIDDict,wireIDDict = read_file_data ("c:\\Sample1.txt")
        print ntIDDict,wireIDDict
    It easiest to iterate a file using the 'in' operator.
    In order to handle spaces (which may vary) or tabs, a list is safer.
    This technique is more likely to raise an IndexError, but also more likely to read the values out of the file:
    Code:
    def read_file_data(strFile):
    
        f = open(strFile,'r')
    
        pntIDDict = {}
        pntCoordDict = {}
        pntList = []
        coordList = []
        wireIDDict ={}
    
    ##    while True:
    ##        strTemp = f.readline()
        for strTemp in f:
            tmpList = strTemp.split()
    
    ##        if len(strTemp)>=1:
    ####            strTemp = strTemp[:(len(strTemp)-1)] really a need for this?
    ##            strTemp = strTemp[:-1]  # if there is, strings know their length
            if tmpList[0] == 'Pnt':
                pntID = int(tmpList[1])
                coordList.append((float(tmpList[2])))
                coordList.append((float(tmpList[3])))
                coordList.append((float(tmpList[4])))
    
                pntIDDict[pntID]=coordList
                coordList = []
    
            elif (tmpList[0] == 'Rect' or tmpList[0] == 'Tria'):
                wireID = int(tmpList[1])
                pntList.append((int(tmpList[2])))
                pntList.append((int(tmpList[3])))
                pntList.append((int(tmpList[4])))
    
                if (strTemp[:4]=='Rect'):
                    pntList.append((int(tmpList[5])))
    
                wireIDDict[wireID] = pntList
                pntList = []
    ##        else:
    ##            break
    
        f.close()
    
        return pntIDDict, wireIDDict
    
    if __name__ == '__main__':
    
        pntIDDict = {}
        wireIDDict = {}
        ntIDDict,wireIDDict = read_file_data ("text1.txt")
        print ntIDDict,wireIDDict
    You should also guard against TypeError for fload() and int() in try blocks.

    Comment

    • psbasha
      Contributor
      • Feb 2007
      • 440

      #3
      Thanks for the suggestion.

      Which place I have to place the "try" and "catch" blocks

      -PSB

      Comment

      • psbasha
        Contributor
        • Feb 2007
        • 440

        #4
        BV ,any comments from your side on reading the above input file data.Is it psosible to reduce any lines of code and make the reading data in more precise way.

        Thanks
        PSB

        Comment

        • bartonc
          Recognized Expert Expert
          • Sep 2006
          • 6478

          #5
          Originally posted by psbasha
          Thanks for the suggestion.

          Which place I have to place the "try" and "catch" blocks

          -PSB
          If you don't care which field is wrong (just want to handle errors gracefully) then wrap all 3 (or 4) conversions in a single block:
          Code:
          >>> float('abc')
            File "<console>", line 1, in ?
          ''' exceptions.ValueError : invalid literal for float(): abc '''
          
          >>> try:
          ...     float('abc')
          ... except ValueError: # Try not to use 'naked' excepts EVER
          ...     print "not a float"
          ...     
          not a float
          >>>
          The same goes for the ints.

          Comment

          • bvdet
            Recognized Expert Specialist
            • Oct 2006
            • 2851

            #6
            Originally posted by psbasha
            BV ,any comments from your side on reading the above input file data.Is it psosible to reduce any lines of code and make the reading data in more precise way.

            Thanks
            PSB
            Shameless! You know I'm a sucker for file parsing problems :)

            Use the convert data function I showed you. Initialize your dictionaries. Read all the lines from the file into a list. No file object is created. Iterate on the list. Create a word list from the line with a list comprehension using strip() and split(' '), skipping the blank strings. Check for keywords in the word list to decide which dictionary to add to using another list comprehension. If the data conversion fails, you have a string.
            Code:
            def read_file_data(f):
                ptDict = {}
                wireDict = {}
                fList = open(f).readlines()
                for line in fList:
                    lineList = [x.lower() for x in line.strip().split(' ') if x != '']
                    if 'rect' in lineList or 'tria' in lineList:
                        wireDict[convert_data(lineList[1])] = [convert_data(x) for x in lineList[2:]]
                    elif 'pnt' in lineList:
                        ptDict[convert_data(lineList[1])] = [convert_data(x) for x in lineList[2:]]
                return ptDict,wireDict

            Comment

            • psbasha
              Contributor
              • Feb 2007
              • 440

              #7
              Originally posted by bvdet
              Shameless! You know I'm a sucker for file parsing problems :)

              Use the convert data function I showed you. Initialize your dictionaries. Read all the lines from the file into a list. No file object is created. Iterate on the list. Create a word list from the line with a list comprehension using strip() and split(' '), skipping the blank strings. Check for keywords in the word list to decide which dictionary to add to using another list comprehension. If the data conversion fails, you have a string.
              Code:
              def read_file_data(f):
                  ptDict = {}
                  wireDict = {}
                  fList = open(f).readlines()
                  for line in fList:
                      lineList = [x.lower() for x in line.strip().split(' ') if x != '']
                      if 'rect' in lineList or 'tria' in lineList:
                          wireDict[convert_data(lineList[1])] = [convert_data(x) for x in lineList[2:]]
                      elif 'pnt' in lineList:
                          ptDict[convert_data(lineList[1])] = [convert_data(x) for x in lineList[2:]]
                  return ptDict,wireDict
              BV,

              If the Point and Wire IDs are having 8-digit number then I am not able to get the details from the above piece of code,sicne we are not having the spaces in between the data.

              How to resolve this issue?

              Code:
              Sample.txt
              Rect    1000000010000000200000007000000060000000
              Rect    2000000020000000300000008000000070000000
              Rect    3000000030000000400000009000000080000000
              Tria     40000000400000005000000090000000
              Pnt     100000000.      0.      0.
              Pnt     200000005.      0.      0.
              Pnt     3000000010.     0.      0.
              Pnt     4000000015.     0.      0.
              Pnt     5000000020.     0.      0.
              Pnt     600000000.      5.      0.
              Pnt     700000005.      5.      0.
              Pnt     8000000010.     5.      0.
              Pnt     9000000015.     5.      0.
              Thanks in advance
              PSB

              Comment

              • bvdet
                Recognized Expert Specialist
                • Oct 2006
                • 2851

                #8
                Originally posted by psbasha
                BV,

                If the Point and Wire IDs are having 8-digit number then I am not able to get the details from the above piece of code,sicne we are not having the spaces in between the data.

                How to resolve this issue?

                Code:
                Sample.txt
                Rect    1000000010000000200000007000000060000000
                Rect    2000000020000000300000008000000070000000
                Rect    3000000030000000400000009000000080000000
                Tria     40000000400000005000000090000000
                Pnt     100000000.      0.      0.
                Pnt     200000005.      0.      0.
                Pnt     3000000010.     0.      0.
                Pnt     4000000015.     0.      0.
                Pnt     5000000020.     0.      0.
                Pnt     600000000.      5.      0.
                Pnt     700000005.      5.      0.
                Pnt     8000000010.     5.      0.
                Pnt     9000000015.     5.      0.
                Thanks in advance
                PSB
                Code:
                import re
                >>> lineList = [x.lower() for x in re.split('[ 0]', line.strip()) if x != '']
                >>> lineList
                ['rect', '1', '1', '2', '7', '6']
                >>>

                Comment

                • psbasha
                  Contributor
                  • Feb 2007
                  • 440

                  #9
                  Originally posted by bvdet
                  Code:
                  import re
                  >>> lineList = [x.lower() for x in re.split('[ 0]', line.strip()) if x != '']
                  >>> lineList
                  ['rect', '1', '1', '2', '7', '6']
                  >>>
                  Sorry BV,the numbers will not be zero for all.It will be 8-digit number and maximum value will be 99999999
                  Code:
                  Sample.txt
                  Rect    1000007110000101200000227000000060000055
                  Rect    2000009220000105300000048000400071111167
                  Rect    3000008830000208400000029000500080003000
                  Tria     40000094400003045000007190000600
                  Pnt      100100123.      0.      0.
                  Pnt      200200035.      0.      0.
                  Pnt      3040000010.     0.      0.
                  Pnt      4000000015.     0.      0.
                  Pnt      5005000020.     0.      0.
                  Pnt      600008000.      5.      0.
                  Pnt      700000005.      5.      0.
                  Pnt      8000900010.     5.      0.
                  Pnt      9000900015.     5.      0.

                  Comment

                  • bvdet
                    Recognized Expert Specialist
                    • Oct 2006
                    • 2851

                    #10
                    Originally posted by psbasha
                    Sorry BV,the numbers will not be zero for all.It will be 8-digit number and maximum value will be 99999999
                    Code:
                    Sample.txt
                    Rect    1000007110000101200000227000000060000055
                    Rect    2000009220000105300000048000400071111167
                    Rect    3000008830000208400000029000500080003000
                    Tria     40000094400003045000007190000600
                    Pnt      100100123.      0.      0.
                    Pnt      200200035.      0.      0.
                    Pnt      3040000010.     0.      0.
                    Pnt      4000000015.     0.      0.
                    Pnt      5005000020.     0.      0.
                    Pnt      600008000.      5.      0.
                    Pnt      700000005.      5.      0.
                    Pnt      8000900010.     5.      0.
                    Pnt      9000900015.     5.      0.
                    You have lost me now. What numbers do you want to extract from 'Rect' and 'Tria'? Your data files need to be in a consistent format with predictable delimiters to parse in this manner.

                    Comment

                    • psbasha
                      Contributor
                      • Feb 2007
                      • 440

                      #11
                      Originally posted by bvdet
                      You have lost me now. What numbers do you want to extract from 'Rect' and 'Tria'? Your data files need to be in a consistent format with predictable delimiters to parse in this manner.
                      The output should look like this
                      o/p should be :

                      WireDict
                      {10000071:[110000101,20000 022,70000000,60 000055],
                      20000092:[2000010,5300000 0,480004000,711 11167],
                      3000008:[830000208,40000 002,90005000,80 003000],
                      40000094:[40000304,500000 71,90000600]}

                      pntDict
                      { 100100123:[0.0,0.0,0.0],20020003:[5.0,0.0,0.0],30400000:[10.0,0.0,0.0],
                      40000000:[15.0,0.0,0.0],50050000:[20.0,0.0,0.0],60000800:[0.0,5.0,0.0],70000000:[5.0,5.0,0.],
                      80009000:[10.0,5.0,0.0],90009000:[15.0,5.0,0.]
                      }

                      Comment

                      • psbasha
                        Contributor
                        • Feb 2007
                        • 440

                        #12
                        Originally posted by psbasha
                        The output should look like this
                        o/p should be :

                        WireDict
                        {10000071:[110000101,20000 022,70000000,60 000055],
                        20000092:[2000010,5300000 0,480004000,711 11167],
                        3000008:[830000208,40000 002,90005000,80 003000],
                        40000094:[40000304,500000 71,90000600]}

                        pntDict
                        { 100100123:[0.0,0.0,0.0],20020003:[5.0,0.0,0.0],30400000:[10.0,0.0,0.0],
                        40000000:[15.0,0.0,0.0],50050000:[20.0,0.0,0.0],60000800:[0.0,5.0,0.0],70000000:[5.0,5.0,0.],
                        80009000:[10.0,5.0,0.0],90009000:[15.0,5.0,0.]
                        }
                        We have to break at every 8-fields of the number or string.So How can I split it ,without using slicing mechanism.

                        Comment

                        • bvdet
                          Recognized Expert Specialist
                          • Oct 2006
                          • 2851

                          #13
                          Originally posted by psbasha
                          We have to break at every 8-fields of the number or string.So How can I split it ,without using slicing mechanism.
                          Why not use slices? If your data will be in 8 character fields, it seems to me that would be a good method.
                          Code:
                          def each8(item):
                              cnt = 0
                              for x in range(len(item)/8):
                                  yield item[cnt:cnt+8]
                                  cnt += 8
                          
                          def read_file_data(f):
                              ptDict = {}
                              wireDict = {}
                              fList = open(f).readlines()
                              for line in fList:
                                  lineList = [x.lower().strip() for x in line.strip().split(' ', 1) if x != '']
                                  data = [lineList[0], lineList[1][:8], lineList[1][8:]]
                                  if 'rect' in lineList or 'tria' in lineList:
                                      wireDict[convert_data(data[1])] = [convert_data(x) for x in each8(data[2])]
                                  elif 'pnt' in lineList:
                                      ptDict[convert_data(data[1])] = [convert_data(x) for x in data[2].split() if x != '']
                          Code:
                          import re
                          ..................................
                              for line in fList:
                                  lineList = [x.lower().strip() for x in line.strip().split(' ', 1) if x != '']
                                  
                                  if 'rect' in lineList or 'tria' in lineList:
                                      wireDict[convert_data(lineList[1][:8])] = \
                                          [convert_data(x) for x in re.findall(r"\d{8}", lineList[1])]
                                      
                                  elif 'pnt' in lineList:
                                      ptDict[convert_data(lineList[1][:8])] = \
                                          [convert_data(y.strip()) for y in [x for x in re.split(r"\d{8}", \
                                              lineList[1]) if x != ''][0].split(' ') if y != '']
                          Take your choice. I'm no expert at regex!

                          Comment

                          • bvdet
                            Recognized Expert Specialist
                            • Oct 2006
                            • 2851

                            #14
                            I like this version of each8() better:
                            Code:
                            def each8(s):
                                while len(s) > 0:
                                    yield s[:8]
                                    s = s[8:]

                            Comment

                            • psbasha
                              Contributor
                              • Feb 2007
                              • 440

                              #15
                              Code:
                              Sample1.txt
                               
                              Sample.txt
                              Pnt      100100123.      0.      0.
                              Pnt      200200035.      0.      0.
                              Pnt      3040000010.     0.      0.
                              Pnt      4000000015.     0.      0.
                              Pnt      5005000020.     0.      0.
                              Pnt      600008000.      5.      0.
                              Pnt      700000005.      5.      0.
                              Pnt      8000900010.     5.      0.
                              Pnt      9000900015.     5.      0.
                              Code:
                              Sample2.txt
                              Pnt    *         3280311       0          1.36567432E+03 -3.71226532E+02
                              *         2.01031464E+02       0
                              Pnt	 *         3280502       0          1.25433850E+03 -1.42613068E+02
                              *         1.80202667E+02       0
                              Pnt	 *         3280503       0          1.27057288E+03 -1.75843582E+02
                              *         1.84236084E+02       0
                              Pnt    *         3280504       0          1.28286145E+03 -2.01004501E+02
                              *         1.87218460E+02       0
                              Code:
                              Sample3.txt
                              Pnt*     10260209                       1156.26599      313.992828
                              *       155.018463
                              Pnt*     10270106                       1097.15002      250.676315
                              *       140.789337
                              Pnt*     10270107                       1115.47864      271.83374
                              *       144.698837

                              Comment

                              Working...