
dict, for "dictionary".dict can be called with a collection argument to create a dictionary with the elements of the argument.dict((('A','adenine'),('T', 'thymine'), ('C','cytosine'),('G','guanine')))
{'A': 'adenine', 'C': 'cytosine', 'G': 'guanine', 'T': 'thymine'}
dict does not allow keys to be instances of mutable built-in types.RNA_codon_table = {
# Second Base
# U C A G
# U
'UUU': 'Phe', 'UCU': 'Ser', 'UAU': 'Tyr', 'UGU': 'Cys', # UxU
'UUC': 'Phe', 'UCC': 'Ser', 'UAC': 'Tyr', 'UGC': 'Cys', # UxC
'UUA': 'Leu', 'UCA': 'Ser', 'UAA': '---', 'UGA': '---', # UxA
'UUG': 'Leu', 'UCG': 'Ser', 'UAG': '---', 'UGG': 'Urp', # UxG
# C
'CUU': 'Leu', 'CCU': 'Pro', 'CAU': 'His', 'CGU': 'Arg', # CxU
'CUC': 'Leu', 'CCC': 'Pro', 'CAC': 'His', 'CGC': 'Arg', # CxC
'CUA': 'Leu', 'CCA': 'Pro', 'CAA': 'Gln', 'CGA': 'Arg', # CxA
'CUG': 'Leu', 'CCG': 'Pro', 'CAG': 'Gln', 'CGG': 'Arg', # CxG
# A
'AUU': 'Ile', 'ACU': 'Thr', 'AAU': 'Asn', 'AGU': 'Ser', # AxU
'AUC': 'Ile', 'ACC': 'Thr', 'AAC': 'Asn', 'AGC': 'Ser', # AxC
'AUA': 'Ile', 'ACA': 'Thr', 'AAA': 'Lys', 'AGA': 'Arg', # AxA
'AUG': 'Met', 'ACG': 'Thr', 'AAG': 'Lys', 'AGG': 'Arg', # AxG
# G
'GUU': 'Val', 'GCU': 'Ala', 'GAU': 'Asp', 'GGU': 'Gly', # GxU
'GUC': 'Val', 'GCC': 'Ala', 'GAC': 'Asp', 'GGC': 'Gly', # GxC
'GUA': 'Val', 'GCA': 'Ala', 'GAA': 'Glu', 'GGA': 'Gly', # GxA
'GUG': 'Val', 'GCG': 'Ala', 'GAG': 'Glu', 'GGG': 'Gly' # GxG
}
def translate_RNA_codon(codon):
"""RNA codon lookup from a dictionary"""
return RNA_codon_table[codon]
translate_RNA_codon('GUG')
RNA_codon_table
from pprint import pprint as pp
pp(RNA_codon_table)


list(RNA_codon_table.keys())
generator. interface to an external file, not the file itself.open(path, mode) creates a file object representing the external file at the operating system location specified by the string path.
close() to close a file object when it’s no longer neededwith statement is used to open and name a file, then automatically close the file regardless of whether an error occurs during the execution of its statements.
with open(path, mode) as name:statements using name
with open(path1, mode1) as name1, open(path2, mode2) as name2, ... :statements using names
fileobj.read([count]) - Reads count bytes, or until the end of the file, whichever comes first; if count is omitted, reads everything until the end of the file. If at the end of the file, returns an empty string. This method treats the file as an input stream of characters.fileobj.readline([count]) - Reads one line from the file object and returns the entire line, including the end-of-line character; if count is present, reads at most count characters. If at the end of the file, returns an empty string. This method treats the file as an input stream of lines.fileobj.readlines() - Reads lines of a file object until the end of the file is reached and returns them as a list of strings; this method treats the file as an input stream of lines.fileobj.write(string) - Writes string to fileobj , treating it as an output stream of characters.fileobj.writelines(sequence) - Writes each element of sequence , which must all be strings, to fileobj, treating it as an output stream of lines.def read_FASTA_strings(filename):
"""Read FASTA sequence from a file"""
with open(filename) as file:
return file.read().split('>')[1:]
seqs = read_FASTA_strings("data/aa003.fasta")
seqs

random.randintrandom.randintnext with the generator object as its argument. yield statement is encountered. The value of the yield is returned as the value of next.
next(generator[, default])- Gets the next value from the generator object; if the generator has no more values to produce, returnsdefault, raising an error if no default value was specified.
def genTest():
yield 1
yield 2
genTest()
foo = genTest()
foo.__next__()
for n in genTest():
print(n)
def genFib():
fibn_1 = 1 # fib(n - 1)
fibn_2 = 0 # fib(n - 2)
while True:
next = fibn_1 + fibn_2 # fib(n) = fib(n - 1) + fib(n - 2)
yield next
fibn_2 = fibn_1
fibn_1 = next
fib = genFib()
for i in range(10):
print(fib.__next__())
The simplest form of list comprehension is:
[expression for item in collection]
def validate_base_sequence(base_sequence, RNAflag = False):
valid_bases = 'UCAG' if RNAflag else 'TCAG'
return all([(base in valid_bases)
for base in base_sequence.upper()])
from random import randint
def random_base(RNAflag = False):
return ('UCAG' if RNAflag else 'TCAG')[randint(0,3)]
def random_codon(RNAflag = False):
return random_base(RNAflag) + random_base(RNAflag) + random_base(RNAflag)
def random_codons(minlength = 3, maxlength = 10, RNAflag = False):
"""Generate a random list of codons (RNA if RNAflag, else DNA)
between minlength and maxlength, inclusive"""
return [random_codon(RNAflag)
for n in range(randint(minlength, maxlength))]
minlength = 2
maxlength = 5
RNAflag = True
randnum = randint(minlength, maxlength)
randnum
[n for n in range(randnum)]
[random_codon(RNAflag) for n in range(randnum)]
def random_codons_translation(minlength = 3, maxlength = 10):
"""Generate a random list of codons between minlength and
maxlength, inclusive"""
return [translate_RNA_codon(codon) for codon in
random_codons(minlength, maxlength, True)]
random_codons_translation()
def test():
print()
print(random_base())
print(random_base())
print(random_base(False))
print(random_base(False))
print()
print(random_base(True))
print(random_base(True))
print(random_base(True))
print(random_base(True))
print()
print(random_codon())
print(random_codon(False))
print(random_codon(True))
print()
print(random_codons())
print(random_codons())
print(random_codons())
print(random_codons())
print()
print(random_codons(6))
print(random_codons(6, 15))
print()
print(random_codons(RNAflag = True))
print(random_codons(RNAflag = True))
print()
print(random_codons_translation())
print(random_codons_translation(5))
print()
print(random_codons_translation(8, 12))
print(random_codons_translation(8, 12))
test()
def read_FASTA_entries(filename):
return [seq.partition('\n') for seq in read_FASTA_strings(filename)]
string and another string sepr, the call string.partition(sepr) returns a tuple with three elements: '\n' will split the description from the base sequence.seqs = read_FASTA_entries("data/aa003.fasta")
seqs
str.replace and another list comprehension. '>' that begins each description.def read_FASTA_sequences(filename):
return [[seq[0], seq[2].replace('\n', '')] # delete newlines
for seq in read_FASTA_entries(filename)]
seqs = read_FASTA_sequences("data/aa003.fasta")
seqs
def read_FASTA_sequences_unpacked(filename):
return [(info, seq.replace('\n', ''))
for info, ignore, seq in # ignore is ignored (!)
read_FASTA_entries(filename)]
str.split to return a list of field values for the description instead of just a string.def read_FASTA_sequences_and_info(filename):
return [[seq[0].split('|'), seq[1]] for seq in
read_FASTA_sequences(filename)]
seqs = read_FASTA_sequences_and_info(filename)
print(seqs)
#Reading FASTA sequences with one compact function
def read_FASTA(filename):
with open(filename) as file:
return [(part[0].split('|'),
part[2].replace('\n', ''))
for part in
[entry.partition('\n')
for entry in file.read().split('>')[1:]]]
filename = 'data/aa003.fasta'
seqs = read_FASTA(filename)
seqs
{expression for item in collection}{key-expression: value-expression for key, value in collection}def make_indexed_sequence_dictionary(filename):
return {info[3]: seq for info, seq in read_FASTA(filename)}
seqs = make_indexed_sequence_dictionary(filename)
seqs
(expression for item in collection)### Generating amino acid translations of codons
def aa_generator(rnaseq):
"""Return a generator object that produces an amino acid by
translating the next three characters of rnaseq each time nextn
is called on it"""
return (translate_RNA_codon(rnaseq[n:n+3])
for n in range(0, len(rnaseq), 3))
seq = 'AUUCGAUCCGGACCCAUGAUCCCG'
print()
print(seq)
gen = aa_generator(seq)
assert 'Ile' == next(gen)
assert 'Arg' == next(gen)
assert 'Ser' == next(gen)
assert 'Gly' == next(gen)
assert 'Pro' == next(gen)
assert 'Met' == next(gen)
assert 'Ile' == next(gen)
gen = aa_generator(seq)
print(''.join(list(gen)))
[expression for element in collection if test]
Extract just sequence descriptions from a FASTA file and split them into fields at their vertical bars.
### Reading FASTA descriptions from a file
def get_FASTA_descriptions(filename):
with open(filename) as file:
return [line[1:].split('|') for line in file if line[0] == '>']
print(get_FASTA_descriptions('data/aa010.fasta'))
### Reading FASTA descriptions using set comprehension (Read 3rd field)
def get_FASTA_codes(filename):
with open(filename) as file:
return {line.split('|')[3] for line in file
if line[0] == '>' and len(line.split('|')) > 2}
#print(get_FASTA_codes('data/BacillusSubtilisPlastmidP1414.fasta'))
print(get_FASTA_codes('data/aa010.fasta'))
### Constructing a selective dictionary
def make_gi_indexed_sequence_dictionary(filename):
return {info[1]: seq for info, seq in read_FASTA(filename)
if len(info) >= 2 and info[0] == 'gi'}
print(make_gi_indexed_sequence_dictionary('data/aa003.fasta'))
### Using a generator to find the first common element
def first_common(collection1, collection2):
"""Return the first element in collection1 that is in collection2"""
return next((item for item in collection1 if item in collection2), None)
print(first_common(range(1,22, 5), range(0, 22, 4)))
### A nested comprehension for generating codons
def generate_triples(chars='TCAG'):
"""Return a list of all three-character combinations of unique
characters in chars"""
chars = set(chars)
return [b1 + b2 + b3 for b1 in chars for b2 in chars for b3 in chars]
print(generate_triples())
print(set(generate_triples()))
key parameter associated with few functions & methods
#### key parametermax(range(3, 7), key=abs)
max(range(-7, 3))
max(range(-7, 3), key=abs)
The value of the key argument is called on each element of the collection.
Consider a list seq_list containing RNA base sequences. We could select the sequence with the lowest GC content by calling min with key = gc_content.
The method list.sort was described. It too can take an optional key parameter - the value of key is called on each element, and the elements of the list are sorted according to the values returned.
lst = ['T', 'G', 'A', 'G', 't', 'g', 'a', 'g']
lst
lst.sort()
lst
lst.sort(key=str.lower)
lst
seqs = ['TACCTATACCGGCTA', 'cacctctaccgta', 'AACCTGTCCGGCTA']
seqs.sort()
seqs
seqs = ['TACCTATACCGGCTA', 'cacctctaccgta', 'AACCTGTCCGGCTA']
seqs.sort(key = str.lower)
seqs
def statement creates a function object and names it.def. lambda args: expression-using-argsdef fn (x,y):
return x*x + y*y
fn = lambda x, y: x*x + y*y
### Definition of a function with a functional argument
def some(coll, pred=lambda x: x):
"""Return true if pred(item) is true for some item in coll"""
return next((True for item in coll if pred(item)), False)
print()
print('some(range(5)) is', some(range(5)))
print('some((None, '', 0)) is', some((None, '', 0)))
print('some(range(5), lambda x: x > 5) is', some(range(5), lambda x: x > 5))
print('some(range(5), lambda x: x > 3) is', some(range(5), lambda x: x > 3))
Sorting a list of strings in mixed case, suppose we want to order them by size first, and then alphabetically
l = [(3, 'abc'), (5, 'ghijk'), (5, 'abcde'), (2, 'bd')]
l.sort()
l
l = ['abc', 'ghijk', 'abcde', 'bd']
l.sort(key=lambda seq:(len(seq), seq.lower()))
l