|
"""
def reverseComplement(seq):
seq = seq.upper()
seq = seq.replace('U', 'T')
rcSeq = []
for nt in seq:
if nt == 'A':
rcSeq.append('T')
elif nt == 'T':
rcSeq.append('A')
elif nt == 'C':
rcSeq.append('G')
elif nt == 'G':
rcSeq.append('C')
rcSeq.reverse()
rcSeq = ''.join(rcSeq)
if len(rcSeq) != len(seq):
print "Incomplete reverse complement conversion"
sys.exit()
return rcSeq
def possibleSeedMatches(smRegion):
seedMatches = {}
# make possible 6mers
seedMatch = smRegion[1:7]
for t9 in ['A','C','G','T']:
for t8 in ['A','C','G','T']:
if t8 == smRegion[0]: continue
for t1 in ['C','G','T']:
seedMatches[t9 + t8 + seedMatch + t1] = 't2t7'
# make possible A1 7mers
for t9 in ['A','C','G','T']:
for t8 in ['A','C','G','T']:
if t8 == smRegion[0]: continue
seedMatches[t9 + t8 + seedMatch + 'A'] = 't1At7'
# make possible M8 7mers
for t9 in ['A','C','G','T']:
for t1 in ['C','G','T']:
seedMatches[t9 + smRegion[0] + seedMatch + t1] = 't2t8'
# make possible 8mers
for t9 in ['A','C','G','T']:
seedMatches[t9 + smRegion[0] + seedMatch + 'A'] = 't1At8'
# for sm in seedMatches:
# print sm, seedMatches[sm]
# print len(seedMatches), "possible seed matches generated"
return seedMatches
def parseSiInput():
opts,args = getopt.getopt(sys.argv[1:], "",
["org=", "si=", "miRname="])
if not opts:
print "Need to provide command-line options:"
print "--org and --si or --miRname"
sys.exit()
inputOrg, si, miRname = None, None, None
for o,a in opts:
if o in ("--org"):
inputOrg = a
if o in ("--si"):
si = a
if o in ("--miRname"):
miRname = a
# handle an empty form - org will default to 'Human'
if not si and not miRname:
print "No input provided"
sys.exit()
# handle dual input case
if si and miRname:
print "Both a miRNA/siRNA sequence and a miRBase miRNA name were submitted. Please input just one or the other."
sys.exit()
# handle miRNA name input
if not si and miRname:
inputOrg, siSeqs, error = convertNameToSeqAndOrg(miRname)
if error:
print error
sys.exit()
elif si and not miRname:
siSeqs = si.split(',')
org = ORGs[inputOrg]
return siSeqs, org
def retrieveTargets(path, org, word):
fn = path + org + '/wordIndices/' + word[0] + '/' + word + '_index.pickle'
if not os.path.exists(fn): return None
fh = open(fn, 'rb')
targets = cPickle.load(fh)
fh.close()
# print len(targets), "targets retrieved"
return targets
def retrieveGeneIDs(path, org):
fn = path + org + '/geneNames.pickle'
fh = open(fn, 'rb')
geneIDs = cPickle.load(fh)
fh.close()
return geneIDs
def scoreTarget(sm, info):
complete = True
# sm[0] is the t9 position
seedType = info['seedType']
score = SEED_WEIGHTS[seedType][sm[0]]
if seedType in ['t1At7', 't2t8', 't1At8']:
# upstream conservation
upCon = info['upCon']
if upCon == 'NA':
upConScore = 0.0
complete = False
else:
if upCon <= BIN_CUTOFFS['upCon']['1'][-1]:
bin = '1'
elif upCon >= BIN_CUTOFFS['upCon']['3'][0]:
bin = '3'
else:
bin = '2'
upConScore = BIN_LFCs['upCon'][bin] \
- nLCF_MEANS['upCon']
# downstream AU content
downAU = info['downAU']
if downAU == 'NA':
downAUScore = 0.0
complete = False
else:
if downAU <= BIN_CUTOFFS['downAU']['1'][-1]:
bin = '1'
elif downAU >= BIN_CUTOFFS['downAU']['3'][0]:
bin = '3'
else:
bin = '2'
downAUScore = BIN_LFCs['downAU'][bin] \
- nLCF_MEANS['downAU']
score += upConScore + downAUScore
return score, complete
def scoreAllTargets(seedMatches, targetsBySM):
scoredTargets = {}
# determine if all seed matches have a single seed match type
mixed = False
for sm in seedMatches:
if len(seedMatches[sm]) > 1:
mixed = True
break
for sm in targetsBySM:
# print '\n', sm, seedMatches[sm]
for id in targetsBySM[sm]:
# print id
scoredTargets.setdefault(id, {'name': None, 'product': None,
'score': 0.0, 'complete': True,
'total': 0, 'mixed': mixed})
# if some mixed type, set all type counts to 'NA'
for type in ['con_t2t7', 'con_t1At7',
'con_t2t8', 'con_t1At8',
'nc_t2t7', 'nc_t1At7',
'nc_t2t8', 'nc_t1At8']:
if mixed:
scoredTargets[id][type] = 'NA'
else:
scoredTargets[id].setdefault(type, 0)
# score each match
smScore = 0.0
for match in targetsBySM[sm][id]:
# print match
score,complete = scoreTarget(sm, match)
# print 'single match score:', score
smScore += score
if not complete: scoredTargets[id]['complete'] = False
scoredTargets[id]['name'] = match['name']
scoredTargets[id]['product'] = match['product']
# only record conservation type if single seed match type
if not mixed:
if seedMatches[sm][0] == 't2t7':
if match['con_m2m7'] == 1:
scoredTargets[id]['con_t2t7'] += 1
else:
scoredTargets[id]['nc_t2t7'] += 1
elif seedMatches[sm][0] == 't1At7':
if match['con_m1m7'] == 1:
scoredTargets[id]['con_t1At7'] += 1
else:
scoredTargets[id]['nc_t1At7'] += 1
elif seedMatches[sm][0] == 't2t8':
if match['con_m2m8'] == 1:
scoredTargets[id]['con_t2t8'] += 1
else:
scoredTargets[id]['nc_t2t8'] += 1
elif seedMatches[sm][0] == 't1At8':
if match['con_m1m8'] == 1:
scoredTargets[id]['con_t1At8'] += 1
else:
scoredTargets[id]['nc_t1At8'] += 1
# correct for multiple seed match types
smScore = smScore/len(seedMatches[sm])
# print "seed match score:", smScore
scoredTargets[id]['score'] += smScore
# print scoredTargets[id]['score']
scoredTargets[id]['total'] += len(targetsBySM[sm][id])/len(seedMatches[sm])
return scoredTargets
def outputTargetsAsText(siSeqs, inputOrg):
# org = ORGs[inputOrg]
# siSeqs = siSeqs.split(',')
targets, error = targetRank(siSeqs, inputOrg)
if error: return error
targetIDs = targets.keys()
targetIDs.sort(lambda x,y: cmp(targets[x]['score'], targets[y]['score']))
targetIDs.reverse()
seedTypes = ['con_t1At8', 'con_t2t8', 'con_t1At7', 'con_t2t7',
'nc_t1At8', 'nc_t2t8', 'nc_t1At7', 'nc_t2t7']
s = 'Rank, Gene Name, Gene/Isoform Description, Refseq ID, TargetRank Score, '
s += 'Conserved Seed Matches: 8mer, M8 7mer, A1 7mer, 6mer, '
s += 'Non-conserved Seed Matches: 8mer, M8 7mer, A1 7mer, 6mer\n'
sameRankCount = 1
rank = 1
prevScore = targets[targetIDs[0]]['score']
for i,id in enumerate(targetIDs):
score = targets[id]['score']
# filter for minimum score
if score < MIN_SCORE: continue
if score != prevScore:
rank += sameRankCount
sameRankCount = 1
prevScore = targets[id]['score']
elif i != 0:
sameRankCount += 1
firstID = id.split('|')[0]
s += "%s, " %rank
s += "%s, " %targets[id]['name']
s += "%s, " %targets[id]['product'].replace(',', ' -')
s += "%s, " %firstID
s += "%.4f, " %targets[id]['score']
s += "%s, " %targets[id]['total']
s += "%s, " %targets[id]['con_t1At8']
s += "%s, " %targets[id]['con_t2t8']
s += "%s, " %targets[id]['con_t1At7']
s += "%s, " %targets[id]['con_t2t7']
s += "%s, " %targets[id]['nc_t1At8']
s += "%s, " %targets[id]['nc_t2t8']
s += "%s, " %targets[id]['nc_t1At7']
s += "%s\n" %targets[id]['nc_t2t7']
return s
def convertNameToSeqAndOrg(miRnames):
inputOrg, seqs, error = None, [], None
miRnames = miRnames.split(',')
for miRname in miRnames:
miRname = miRname.lower().strip().replace("\'", '').replace('\"', '')
# deduce organism
if miRname.startswith('hsa'):
org = 'hg18'
if inputOrg and inputOrg != 'Human':
error = HTML_HEADER
error += """"""
error += """"""
error += """ """
error += HTML_FOOTER
return None, None, error
inputOrg = 'Human'
elif miRname.startswith('mmu'):
org = 'mm8'
if inputOrg and inputOrg != 'Mouse':
error = HTML_HEADER
error += """"""
error += """"""
error += """ """
error += HTML_FOOTER
return None, None, error
inputOrg = 'Mouse'
else:
# error if not a human or mouse miRname
error = HTML_HEADER
error += """"""
error += """""" %miRname
error += """ """
error += HTML_FOOTER
return None, None, error
# open the appropriate miRBase file
fn = PATH + org + '/miRBase_10.pickle'
fh = open(fn, 'rb')
miRseeds = cPickle.load(fh)
fh.close()
if not miRseeds.has_key(miRname):
# error if unknown miRname
error = HTML_HEADER
error += """
""" %miRname
error += HTML_FOOTER
return None, None, error
seqs.append(miRseeds[miRname])
return inputOrg, seqs, error
def outputTargetsAsHTML(si=None, inputOrg=None, miRname=None):
# handle an empty form - org will default to 'Human'
if not si and not miRname:
error = HTML_HEADER
error += """"""
error += """"""
error += """ """
error += HTML_FOOTER
return error
# handle dual input case
if si and miRname:
error = HTML_HEADER
error += """"""
error += """"""
error += """ """
error += HTML_FOOTER
return error
# handle miRNA name input
if not si and miRname:
inputOrg, siSeqs, error = convertNameToSeqAndOrg(miRname)
if error: return error
elif si and not miRname:
siSeqs = si.split(',')
displayCount = 100
org = ORGs[inputOrg]
if org == 'hg18':
orgName = 'Homo+sapiens'
elif org == 'mm8':
orgName = 'Mus+musculus'
targets, error = targetRank(siSeqs, org)
if error: return error
targetIDs = targets.keys()
targetIDs.sort(lambda x,y: cmp(targets[x]['score'], targets[y]['score']))
targetIDs.reverse()
s = HTML_HEADER
# filter targets for a min score
filteredTargetIDs = []
for id in targetIDs:
if targets[id]['score'] < MIN_SCORE:
continue
filteredTargetIDs.append(id)
# add a table for input description
s += """"""
if miRname:
s += """%s input miRNA name(s): %s | """ %(inputOrg, miRname)
else:
s += """%s input sequence(s) (5' - 3' orientation): %s | """ %(inputOrg, ','.join(siSeqs))
if len(filteredTargetIDs) < displayCount:
s += """ \
%s targets found |
""" %len(filteredTargetIDs)
else:
s += """ \
%s targets found (top %s displayed) |
""" %(len(filteredTargetIDs), displayCount)
# add a 'download all' button
s += """ \
|
""" %(','.join(siSeqs), inputOrg)
# add a table for targets
s += """ \
"""
seedTypes = ['con_t1At8', 'con_t2t8', 'con_t1At7', 'con_t2t7',
'nc_t1At8', 'nc_t2t8', 'nc_t1At7', 'nc_t2t7']
sameRankCount = 1
rank = 1
prevScore = targets[filteredTargetIDs[0]]['score']
for i,id in enumerate(filteredTargetIDs[:100]):
score = targets[id]['score']
if score != prevScore:
rank += sameRankCount
sameRankCount = 1
prevScore = targets[id]['score']
elif i != 0:
sameRankCount += 1
firstID = id.split('|')[0]
s += ""
s += """%s | """ %rank
s += """%s | """ %(targets[id]['name'],orgName, targets[id]['name'])
s += "%s | " %targets[id]['product']
s += "%s | " %firstID
s += """%.4f | """ %targets[id]['score']
s += """%s | """ %targets[id]['total']
s += """%s | """ %targets[id]['con_t1At8']
s += """%s | """ %targets[id]['con_t2t8']
s += """%s | """ %targets[id]['con_t1At7']
s += """%s | """ %targets[id]['con_t2t7']
s += """%s | """ %targets[id]['nc_t1At8']
s += """%s | """ %targets[id]['nc_t2t8']
s += """%s | """ %targets[id]['nc_t1At7']
s += """%s | """ %targets[id]['nc_t2t7']
s += " "
s += """\
|