Research Article

Transcriptome Analysis of Spermophilus lateralis and Spermophilus tridecemlineatus Liver Does Not Suggest the Presence of Spermophilus-Liver-Specific Reference Genes

Algorithm 2

Python codes for expression analysis.
Datafile=normalized_GSE2021.csv
sample_output =NF_correlation.csv
random_output=random_correlation.csv
sample_list=[02n12, 13d19, 05p15, 02g16, 03o23,
              ‘03j13,13d08,28h07, 13o12, 12p20]
import math
import random
def process(x, y):
mean_x float(sum(x)) / len(x)
mean_y float(sum(y)) / len(y)
covariance sum([((x[i]mean_x) * (y[i]mean_y))
            for i in range(len(x))])
error_x [imean_x for i in x]
error_y [imean_x for i in y]
sd_x math.sqrt(sum([i*i for i in error_x]))
sd_y math.sqrt(sum([i*i for i in error_y]))
gradient sum([error_x[index] * error_y[index]
           for index in range(len(error_x))]) /
        sum([error_x[index] * error_x[index]
           for index in range(len(error_x))])
intercept mean_y (gradient * mean_x)
return (float(covariance), float(sd_x), float(sd_y),
     float(gradient), float(intercept))
fdata [x[:1].split(,) for x in open(datafile, r).readlines()]
# Sorting out the full data (fdata) into 3 parts
# sdata -contains data for IDs in sample_list
# rdata -contains data (n number of IDs in sample_list)
# for IDs not in sample_list
# data -contains data for IDs not in sample_list or rdata
sdata [x for x in fdata if x[0] in sample_list]
data [x for x in fdata if x[0] not in sample_list]
rdata [random.choice(data) for x in range(len(sample_list))]
data [x for x in data if x[0] not in [r[0] for r in rdata]]
sout open(sample_output, w)
rout open(random_output, w)
print str(len(sdata)), number of samples in sample list
print str(len(rdata)), number of samples in random list
print str(len(data)), number of samples in data list
sout.write(,.join([sample_x, sample_y, covariance,
 ‘sd_x, sd_y, gradient, intercept]) + n)
rout.write(,.join([sample_x, sample_y, covariance,
 ‘sd_x, sd_y, gradient, intercept]) + n)
def run_correlation(sample_data, other_data, outfile):
count 1
for s in sample_data:
  ID1 s[0]
  d1 [float(x) for x in s[1:]]
  for x in other_data:
   ID2 x[0]
   d2 [float(y) for y in x[1:]]
   result process(d1, d2)
   result [ID1, ID2, str(result[0]), str(result[1]),
        str(result[2]), str(result[3]), str(result[4])]
   outfile.write(,.join(result) + n)
   count count + 1
  print str(count), processed. ID , ID1
outfile.close()
print Processing sample list
run_correlation(sdata, data, sout)
Print
print Processing random list
run_correlation(rdata, data, rout)