Dataset Paper

Extending MapMan Ontology to Tobacco for Visualization of Gene Expression

Listing 1

Processing BLAST XML to comma-delimited file.
from Bio.Blast import NCBIXML
expect_threshold = 1e-9
outfile = open('blast_output.txt', 'w')
for record in NCBIXML.parse(open('blast_output.xml')):
query_title = record.query.split()[ 0 ]
query_length = float(record.query_length)
for alignment in record.alignments:
for hsp in alignment.hsps:
if float(hsp.expect) < expect_threshold:
data = [query_title,
alignment.title.split()[ 1 ].split('.')[ 0 ],
str(float(hsp.identities)/query_length),
str(query_length),
str(hsp.expect)]
outfile.writelines(','.join(data) + ' n')
outfile.close()