hey!
That code seens to work but the problem is it is checking only for the first six files... i donno where is the looping problem!!
can you see it??
waiting for ur reply!
cheers!!
That code seens to work but the problem is it is checking only for the first six files... i donno where is the looping problem!!
can you see it??
Code:
from math import * def parseArray(fn, dataset=1, key='PO', term='/'): ''' Read a formatted data file in matrix format and compile data into a dictionary ''' f = open(fn) # skip to required data set for _ in range(dataset): try: line = f.next() while not line.startswith(key): line = f.next() except StopIteration, e: print 'We have reached the end of the file!' f.close() return False headerList = line.strip().split()[1:] lineList = [] line = f.next().strip() while not line.startswith(term): if line != '': lineList.append(line.strip().split()) line = f.next().strip() f.close() # Key list keys = [i[0] for i in lineList] # Values list values = [[float(s) for s in item] for item in [j[1:] for j in lineList]] # Create a dictionary from keys and values lineDict = dict(zip(keys, values)) dataDict = {} for i, item in enumerate(headerList): dataDict[item] = {} for key in lineDict: dataDict[item][key] = lineDict[key][i] # Add 1.0 to every element in dataDict subdictionaries for keyMain in dataDict: for keySub in dataDict[keyMain]: dataDict[keyMain][keySub] += 1.0 # Normalize original data (with 1 added) and update data valueSums = [sum(item)+4 for item in values] # print valueSums for keyMain in dataDict: for keySub in dataDict[keyMain]: dataDict[keyMain][keySub] /= valueSums[int(keySub)-1] return dataDict def parseData(fnSeq, dataset=1, key='>'): ''' Read a formatted data file of sequences Return a list of sequences The first element in the list is the header ''' # initialize output list dataList = [] # open file for reading f = open(fnSeq) # skip to required data set for _ in range(dataset): try: s = f.next() while not s.startswith(key): s = f.next() except StopIteration, e: print 'We have reached the end of the file!' f.close() return False # initialize output list dataList = [s,] for line in f: if not line.startswith(key): dataList.append(line.strip()) else: break f.close() return dataList def compileData(fnArray, fnSeq, arraySet=1, seqSet=1): # sequence factor dictionary value={"A":0.3,"T":0.3,"C":0.2,"G":0.2} dataArray = parseArray(fnArray, arraySet) if dataArray: dataSeq = parseData(fnSeq, seqSet) if not dataSeq: return False else: return None # This is the complete sequence seq = ''.join(dataSeq[1:]) # These are the subkeys of dataArray - '01', '02', '03',............. subKeys = dataArray['A'].keys() subKeys.sort() # Calculate num/den for each slice of sequence # Each sequence slice length = length of subKeys # Example: # seq = 'ATCGATA' # subKeys length = 3 # 'ATC', 'TCG', 'CGA', 'GAT', 'ATA' numList = [] denList = [] seqList = [] for i in xrange(len(seq) - len(subKeys)): subseq = seq[0:len(subKeys)] seqList.append(subseq) num, den = 1, 1 for j, s in enumerate(subseq): num *= dataArray[s][subKeys[j]] den *= value[s] numList.append(num) denList.append(den) seq = seq[1:] resultList = [] for i, num in enumerate(numList): #p=log10(num/denList[i]) #if (p) >=2: #print "#########",abs(int(p)) #if (log10(num/denList[i]))>=2: #print "i am here" resultList.append(int(abs(1))) #print resultList #for i in resultList: #mean=sum(resultList)/len(resultList) #sub=mean-i #queue = [] #queue = (sub)**2 #print sqrt(queue/len(resultList)) #print mean,"@@@@@@@@@@" outStr = '\n'.join(['Sequence = %s Calculation = %d' % (seqList[i], res) for i, res in enumerate(resultList)]) #print "this is line 294" return 'Array set # = %d\nSequence set # = %d\nSequence Header: %s\n%s' % (arraySet, seqSet, dataSeq[0], outStr) if __name__ == '__main__': fnArray ='one_redfly.transfac' fnSeq = 'deepthi/upstream_regions' import os dir_name='upstream_regions' fList=os.listdir(dir_name) fList1=[os.path.join(dir_name,f) for f in fList if os.path.isfile(os.path.join(dir_name,f))] seqSetIndex=0 fnSeq=fList1[seqSetIndex] while True: outputfile = "sequence_calc_data.txt" arraySet = 1 outList = [] calcdata = 1 while not calcdata is None: seqSet = 1 while True: calcdata = compileData(fnArray, fnSeq, arraySet, seqSet) print calcdata if calcdata: outList.append(calcdata) seqSet += 1 else: break arraySet += 1 seqSetIndex+=1 else: break f = open(outputfile, 'w') f.write('\n'.join(outList)) f.close() #f=open(outputfile,"r") #file_con=f.readlines() #for line in file_con: # print line
cheers!!
Comment