diff options
| author | Jon Dufresne <jon.dufresne@gmail.com> | 2018-12-22 09:28:48 -0800 |
|---|---|---|
| committer | Jon Dufresne <jon.dufresne@gmail.com> | 2018-12-22 13:46:56 -0800 |
| commit | de8326d00dffdb500c02839a98330b869c2457f3 (patch) | |
| tree | 6c5fdae41cf8b335ff1c64f37856786523e4fd0d /examples/partial_gene_match.py | |
| parent | 59dfd314c23fd653271bdad37631f0497e8ad748 (diff) | |
| download | pyparsing-git-de8326d00dffdb500c02839a98330b869c2457f3.tar.gz | |
Trim trailing white space throughout the project
Many editors clean up trailing white space on save. By removing it all
in one go, it helps keep future diffs cleaner by avoiding spurious white
space changes on unrelated lines.
Diffstat (limited to 'examples/partial_gene_match.py')
| -rw-r--r-- | examples/partial_gene_match.py | 130 |
1 files changed, 65 insertions, 65 deletions
diff --git a/examples/partial_gene_match.py b/examples/partial_gene_match.py index 8bf5f7c..e19cf8b 100644 --- a/examples/partial_gene_match.py +++ b/examples/partial_gene_match.py @@ -1,6 +1,6 @@ # parital_gene_match.py
#
-# Example showing how to create a customized pyparsing Token, in this case,
+# Example showing how to create a customized pyparsing Token, in this case,
# one that is very much like Literal, but which tolerates up to 'n' character
# mismatches
from pyparsing import *
@@ -12,77 +12,77 @@ datafile = urllib.request.urlopen("http://toxodb.org/common/downloads/release-6. fastasrc = datafile.read()
datafile.close()
-"""
-Sample header:
+"""
+Sample header:
>NC_001799-6-2978-2778 | organism=Toxoplasma_gondii_RH | location=NC_001799:2778-2978(-) | length=201
-"""
-integer = Word(nums).setParseAction(lambda t:int(t[0]))
-genebit = Group(">" + Word(alphanums.upper()+"-_") + "|" +
- Word(printables)("id") + SkipTo("length=", include=True) +
- integer("genelen") + LineEnd() +
- Combine(OneOrMore(Word("ACGTN")),adjacent=False)("gene"))
-
+"""
+integer = Word(nums).setParseAction(lambda t:int(t[0]))
+genebit = Group(">" + Word(alphanums.upper()+"-_") + "|" +
+ Word(printables)("id") + SkipTo("length=", include=True) +
+ integer("genelen") + LineEnd() +
+ Combine(OneOrMore(Word("ACGTN")),adjacent=False)("gene"))
+
# read gene data from .fasta file - takes just a few seconds
-genedata = OneOrMore(genebit).parseString(fastasrc)
+genedata = OneOrMore(genebit).parseString(fastasrc)
-class CloseMatch(Token):
+class CloseMatch(Token):
"""A special subclass of Token that does *close* matches. For each
close match of the given string, a tuple is returned giving the
found close match, and a list of mismatch positions."""
- def __init__(self, seq, maxMismatches=1):
- super(CloseMatch,self).__init__()
- self.name = seq
- self.sequence = seq
- self.maxMismatches = maxMismatches
- self.errmsg = "Expected " + self.sequence
- self.mayIndexError = False
- self.mayReturnEmpty = False
-
- def parseImpl( self, instring, loc, doActions=True ):
- start = loc
- instrlen = len(instring)
- maxloc = start + len(self.sequence)
-
- if maxloc <= instrlen:
- seq = self.sequence
- seqloc = 0
- mismatches = []
- throwException = False
- done = False
- while loc < maxloc and not done:
- if instring[loc] != seq[seqloc]:
- mismatches.append(seqloc)
- if len(mismatches) > self.maxMismatches:
- throwException = True
- done = True
- loc += 1
- seqloc += 1
- else:
- throwException = True
-
- if throwException:
- exc = self.myException
- exc.loc = loc
- exc.pstr = instring
- raise exc
-
- return loc, (instring[start:loc],mismatches)
+ def __init__(self, seq, maxMismatches=1):
+ super(CloseMatch,self).__init__()
+ self.name = seq
+ self.sequence = seq
+ self.maxMismatches = maxMismatches
+ self.errmsg = "Expected " + self.sequence
+ self.mayIndexError = False
+ self.mayReturnEmpty = False
+
+ def parseImpl( self, instring, loc, doActions=True ):
+ start = loc
+ instrlen = len(instring)
+ maxloc = start + len(self.sequence)
+
+ if maxloc <= instrlen:
+ seq = self.sequence
+ seqloc = 0
+ mismatches = []
+ throwException = False
+ done = False
+ while loc < maxloc and not done:
+ if instring[loc] != seq[seqloc]:
+ mismatches.append(seqloc)
+ if len(mismatches) > self.maxMismatches:
+ throwException = True
+ done = True
+ loc += 1
+ seqloc += 1
+ else:
+ throwException = True
+
+ if throwException:
+ exc = self.myException
+ exc.loc = loc
+ exc.pstr = instring
+ raise exc
+
+ return loc, (instring[start:loc],mismatches)
# using the genedata extracted above, look for close matches of a gene sequence
searchseq = CloseMatch("TTAAATCTAGAAGAT", 3)
-for g in genedata:
- print("%s (%d)" % (g.id, g.genelen))
- print("-"*24)
- for t,startLoc,endLoc in searchseq.scanString(g.gene, overlap=True):
- matched, mismatches = t[0]
- print("MATCH:", searchseq.sequence)
- print("FOUND:", matched)
- if mismatches:
- print(" ", ''.join(' ' if i not in mismatches else '*'
- for i,c in enumerate(searchseq.sequence)))
- else:
- print("<exact match>")
- print("at location", startLoc)
- print()
- print()
\ No newline at end of file +for g in genedata:
+ print("%s (%d)" % (g.id, g.genelen))
+ print("-"*24)
+ for t,startLoc,endLoc in searchseq.scanString(g.gene, overlap=True):
+ matched, mismatches = t[0]
+ print("MATCH:", searchseq.sequence)
+ print("FOUND:", matched)
+ if mismatches:
+ print(" ", ''.join(' ' if i not in mismatches else '*'
+ for i,c in enumerate(searchseq.sequence)))
+ else:
+ print("<exact match>")
+ print("at location", startLoc)
+ print()
+ print()
|
