return top results from list

Collapse
X
 
  • Time
  • Show
Clear All
new posts
  • kdt
    New Member
    • Mar 2007
    • 50

    return top results from list

    Hi,

    With a list of fixed length strings, I want to count the occurrences of each characters at each of 9 positions. I then want to return the top 2 results for each position. The result has to be a list for the function I am passing this too. The code I have so far has two rather big problems (1) it is too slow and (2) it gives the wrong results :(

    [CODE=python]
    dd ={'.LEA.....':7 7,'R....L...':8 ,'.L....DA.':5, '.L.R.V..L':4,' A....S.SA':55,' QL..L....':5,'M .SC.SE..':77}


    def positionalWeigh ts(dd, topx=2):
    posList = [[] for i in range(9)]

    for key in dd.keys():
    for i, item in enumerate(key):
    if item != '.':
    if posList[i]==[]:
    posList[i].append([item, 1])
    else:
    for c in posList[i]:
    if c[0] != item:
    posList[i].append([item,1])
    else:
    c[1] += 1

    for i in posList:
    for j in i:
    j.sort()

    y =[]
    for i in posList:
    x = topx
    for j, k in enumerate(i):
    z=[]
    while x > 0:
    z.append(k[1])
    x-=1
    y.append(z)
    return y

    pw= positionalWeigh ts(dd)
    print pw


    >>>
    [['A', 'A'], [], [], [], [], [], [], [], ['L', 'L'], ['S', 'S'], [], ['R', 'R'], [], [], [], ['L', 'L'], ['S', 'S'], [], [], [], [], [], [], ['D', 'D'], [], ['S', 'S'], [], ['A', 'A'], []]

    [/CODE]

    Please help!
  • KaezarRex
    New Member
    • Sep 2007
    • 52

    #2
    I think I got the first part working:
    [CODE=python]posList = [[] for i in range(9)]

    for key in dd.keys():
    for i, item in enumerate(key):
    if item != '.':
    if posList[i]==[]:
    posList[i].append([item, 1])
    else:
    found = False
    for c in posList[i]:
    if c[0] == item:
    c[1] += 1
    found = True
    if not found:
    posList[i].append([item,1])[/CODE]
    You wern't checking every list in a posList index before you inserted a new list.
    Last edited by KaezarRex; Sep 18 '07, 07:03 PM. Reason: No message appeared

    Comment

    • kdt
      New Member
      • Mar 2007
      • 50

      #3
      Originally posted by KaezarRex
      I think I got the first part working:
      [CODE=python]posList = [[] for i in range(9)]

      for key in dd.keys():
      for i, item in enumerate(key):
      if item != '.':
      if posList[i]==[]:
      posList[i].append([item, 1])
      else:
      found = False
      for c in posList[i]:
      if c[0] == item:
      c[1] += 1
      found = True
      if not found:
      posList[i].append([item,1])[/CODE]
      You wern't checking every list in a posList index before you inserted a new list.
      cheers mate, made quite a few mistakes in this one. Finally got it working now- yipee! Still if anyone can propose speed tips, I'm using psyco, and it doesn't seem to be making much difference :S

      [CODE=python]
      def positionalWeigh ts(dd, topx=2):
      posList = [[] for i in range(9)]

      for key in dd.keys():
      for i, item in enumerate(key):
      if item != '.':
      if posList[i]==[]:
      posList[i].append([1, item])
      else:
      found = False
      for c in posList[i]:
      if c[1] == item:
      c[0] += 1
      found = True
      if not found:
      posList[i].append([1, item])

      for i in posList:
      i.sort()
      i.reverse()

      y =[]
      for i in posList:
      x = topx
      z=[]
      while x>0:
      z.append(i[x][1])
      x-=1
      print z
      y.append(z)
      return y

      >>>
      ['Y', 'M']
      ['V', 'M']
      ['L', 'M']
      ['Y', 'E']
      ['L', 'K']
      ['F', 'L']
      ['L', 'Y']
      ['L', 'Y']
      ['I', 'L']
      [['Y', 'M'], ['V', 'M'], ['L', 'M'], ['Y', 'E'], ['L', 'K'], ['F', 'L'], ['L', 'Y'], ['L', 'Y'], ['I', 'L']]
      [/CODE]

      Comment

      • bvdet
        Recognized Expert Specialist
        • Oct 2006
        • 2851

        #4
        See if this helps any:[code=Python]def positionalWeigh ts(dd, topx=2):
        posDict = [{} for i in range(len(dd.ke ys()[0]))]
        for item in dd.keys():
        item = list(item)
        pos = 0
        while True:
        try:
        s = item.pop(0)
        if s != '.':
        if posDict[pos].has_key(s):
        posDict[pos][s] += 1
        else:
        posDict[pos][s] = 1
        pos += 1
        except: break

        result = []

        for dd in posDict:
        a = sorted(zip(dd.v alues(), dd.keys()), reverse=True)
        print a
        try:
        result.append([[a[0][0], a[0][1]]])
        for i in range(topx-1):
        result[-1].append([a[i+1][0], a[i+1][1]])
        except IndexError, e:
        pass

        return result

        s = positionalWeigh ts(dd, 2)

        for i, item in enumerate(s):
        for j in item:
        print 'Position %d: %s = %d' % (i, j[1], j[0])

        [/code]
        Output:
        >>> [(1, 'R'), (1, 'Q'), (1, 'M'), (1, 'A')]
        [(4, 'L')]
        [(1, 'S'), (1, 'E')]
        [(1, 'R'), (1, 'C'), (1, 'A')]
        [(1, 'L')]
        [(2, 'S'), (1, 'V'), (1, 'L')]
        [(1, 'E'), (1, 'D')]
        [(1, 'S'), (1, 'A')]
        [(1, 'L'), (1, 'A')]
        Position 0: R = 1
        Position 0: Q = 1
        Position 1: L = 4
        Position 2: S = 1
        Position 2: E = 1
        Position 3: R = 1
        Position 3: C = 1
        Position 4: L = 1
        Position 5: S = 2
        Position 5: V = 1
        Position 6: E = 1
        Position 6: D = 1
        Position 7: S = 1
        Position 7: A = 1
        Position 8: L = 1
        Position 8: A = 1
        >>>

        Comment

        • kdt
          New Member
          • Mar 2007
          • 50

          #5
          Originally posted by bvdet
          See if this helps any:[code=Python]def positionalWeigh ts(dd, topx=2):
          posDict = [{} for i in range(len(dd.ke ys()[0]))]
          for item in dd.keys():
          item = list(item)
          pos = 0
          while True:
          try:
          s = item.pop(0)
          if s != '.':
          if posDict[pos].has_key(s):
          posDict[pos][s] += 1
          else:
          posDict[pos][s] = 1
          pos += 1
          except: break

          result = []

          for dd in posDict:
          a = sorted(zip(dd.v alues(), dd.keys()), reverse=True)
          print a
          try:
          result.append([[a[0][0], a[0][1]]])
          for i in range(topx-1):
          result[-1].append([a[i+1][0], a[i+1][1]])
          except IndexError, e:
          pass

          return result

          s = positionalWeigh ts(dd, 2)

          for i, item in enumerate(s):
          for j in item:
          print 'Position %d: %s = %d' % (i, j[1], j[0])

          [/code]
          Output:
          >>> [(1, 'R'), (1, 'Q'), (1, 'M'), (1, 'A')]
          [(4, 'L')]
          [(1, 'S'), (1, 'E')]
          [(1, 'R'), (1, 'C'), (1, 'A')]
          [(1, 'L')]
          [(2, 'S'), (1, 'V'), (1, 'L')]
          [(1, 'E'), (1, 'D')]
          [(1, 'S'), (1, 'A')]
          [(1, 'L'), (1, 'A')]
          Position 0: R = 1
          Position 0: Q = 1
          Position 1: L = 4
          Position 2: S = 1
          Position 2: E = 1
          Position 3: R = 1
          Position 3: C = 1
          Position 4: L = 1
          Position 5: S = 2
          Position 5: V = 1
          Position 6: E = 1
          Position 6: D = 1
          Position 7: S = 1
          Position 7: A = 1
          Position 8: L = 1
          Position 8: A = 1
          >>>

          Thanks mate, it looks like the speed issue is from another part of the program. I'll definately use parts of this (especially for learning, I need to use try: except more) :)

          Comment

          • bvdet
            Recognized Expert Specialist
            • Oct 2006
            • 2851

            #6
            I eliminated one of the try/except blocks by substituting 'while True' for 'while len(item)' and modified the way 'result' is compiled:[code=Python]def positionalWeigh ts(dd, topx=2):
            posDict = [{} for i in range(len(dd.ke ys()[0]))]
            for item in dd.keys():
            item = list(item)
            pos = 0
            while len(item):
            s = item.pop(0)
            if s != '.':
            if posDict[pos].has_key(s):
            posDict[pos][s] += 1
            else:
            posDict[pos][s] = 1
            pos += 1
            result = [[] for i in range(len(dd.ke ys()[0]))]
            for j, dd in enumerate(posDi ct):
            a = sorted(zip(dd.v alues(), dd.keys()), reverse=True)
            try:
            for i in range(topx):
            result[j].append([a[i][0], a[i][1]])
            except IndexError, e:
            pass
            return result[/code]I am sure it can be improved.

            Comment

            • kdt
              New Member
              • Mar 2007
              • 50

              #7
              Originally posted by bvdet
              I eliminated one of the try/except blocks by substituting 'while True' for 'while len(item)' and modified the way 'result' is compiled:[code=Python]def positionalWeigh ts(dd, topx=2):
              posDict = [{} for i in range(len(dd.ke ys()[0]))]
              for item in dd.keys():
              item = list(item)
              pos = 0
              while len(item):
              s = item.pop(0)
              if s != '.':
              if posDict[pos].has_key(s):
              posDict[pos][s] += 1
              else:
              posDict[pos][s] = 1
              pos += 1
              result = [[] for i in range(len(dd.ke ys()[0]))]
              for j, dd in enumerate(posDi ct):
              a = sorted(zip(dd.v alues(), dd.keys()), reverse=True)
              try:
              for i in range(topx):
              result[j].append([a[i][0], a[i][1]])
              except IndexError, e:
              pass
              return result[/code]I am sure it can be improved.
              Thanks bvdet, definately more concise than my attempt. Had to make some slight changes to it to get the output I wanted. However, there is a really strange property of it, in it that it doesn't return the top topx results, instead it will only return the top 1 for each position regardless of the value of topx. You can however add y to topx where y = top number of results you want -1. Strange indeed!

              [CODE=python]
              def positionalWeigh ts(dd,topx =5):
              posDict = [{} for i in range(len(dd.ke ys()[0]))]
              for item in dd.keys():
              item = list(item)
              pos = 0
              while len(item):
              s = item.pop(0)
              if s != '.':
              if posDict[pos].has_key(s):
              posDict[pos][s] += 1
              else:
              posDict[pos][s] = 1
              pos += 1
              result = [[] for i in range(len(dd.ke ys()[0]))]
              for j, dd in enumerate(posDi ct):
              a = sorted(zip(dd.v alues(), dd.keys()), reverse=True)
              try:
              for i in range(topx+1): # need to add 1 to return top 2
              result[j].append(a[i][1]) # changed for top topx results, no counts required
              except IndexError, e:
              pass
              return result

              >>>
              [['F', 'M'], ['L', 'M'], ['A', 'M'], ['F', 'E'], ['Y', 'K'], ['I', 'L'], ['F', 'Y'], ['S', 'Y'], ['V', 'L']]

              [/CODE]

              Comment

              • bvdet
                Recognized Expert Specialist
                • Oct 2006
                • 2851

                #8
                This code seems to work correctly without adding 1 to 'topx':[code=Python]def positionalWeigh ts(dd, topx=2):
                posDict = [{} for i in range(len(dd.ke ys()[0]))]
                for item in dd.keys():
                item = list(item)
                pos = 0
                while len(item):
                s = item.pop(0)
                if s != '.':
                if posDict[pos].has_key(s):
                posDict[pos][s] += 1
                else:
                posDict[pos][s] = 1
                pos += 1
                print posDict
                result = [[] for i in range(len(dd.ke ys()[0]))]
                for j, dd in enumerate(posDi ct):
                a = sorted(zip(dd.v alues(), dd.keys()), reverse=True)
                print a
                try:
                for i in range(topx):
                # with counts
                # result[j].append([a[i][0], a[i][1]])
                # without counts
                result[j].append(a[i][1])
                except IndexError, e:
                pass
                return result

                dd ={'.LEA.....':7 7,'R....L...':8 ,'.L....DA.':5, '.L.R.V..L':4,\
                'A....S.SA':55, 'QL..L....':5,' M.SC.SE..':77, '.LEADER..':5,\
                '..LEADER.':5, '.LL..TT..':5, 'AZR..TFGG':5}

                s = positionalWeigh ts(dd, 3)
                print s[/code]Output:
                Code:
                >>> [{'A': 2, 'Q': 1, 'R': 1, 'M': 1}, {'Z': 1, 'L': 6}, {'S': 1, 'R': 1, 'E': 2, 'L': 2}, {'A': 2, 'C': 1, 'R': 1, 'E': 1}, {'A': 1, 'D': 1, 'L': 1}, {'E': 1, 'D': 1, 'L': 1, 'S': 2, 'T': 2, 'V': 1}, {'F': 1, 'R': 1, 'E': 2, 'D': 1, 'T': 1}, {'A': 1, 'S': 1, 'R': 1, 'G': 1}, {'A': 1, 'L': 1, 'G': 1}]
                [(2, 'A'), (1, 'R'), (1, 'Q'), (1, 'M')]
                [(6, 'L'), (1, 'Z')]
                [(2, 'L'), (2, 'E'), (1, 'S'), (1, 'R')]
                [(2, 'A'), (1, 'R'), (1, 'E'), (1, 'C')]
                [(1, 'L'), (1, 'D'), (1, 'A')]
                [(2, 'T'), (2, 'S'), (1, 'V'), (1, 'L'), (1, 'E'), (1, 'D')]
                [(2, 'E'), (1, 'T'), (1, 'R'), (1, 'F'), (1, 'D')]
                [(1, 'S'), (1, 'R'), (1, 'G'), (1, 'A')]
                [(1, 'L'), (1, 'G'), (1, 'A')]
                [['A', 'R', 'Q'], ['L', 'Z'], ['L', 'E', 'S'], ['A', 'R', 'E'], ['L', 'D', 'A'], ['T', 'S', 'V'], ['E', 'T', 'R'], ['S', 'R', 'G'], ['L', 'G', 'A']]
                >>>

                Comment

                • kdt
                  New Member
                  • Mar 2007
                  • 50

                  #9
                  sorry, please disregard my last post, I was being silly again - passing the wrong values to the function. All's good now

                  Thanks
                  Last edited by kdt; Sep 19 '07, 01:46 PM. Reason: too long

                  Comment

                  Working...