INPUTS: DGV.xls, chr122.gc, chr122.map step 1 |
segments_gc = segmenting(smoothing(chr122.gc)) |
FOR i IN 1 TO number_of_items_in_segments_gc |
IF (segments_gc[i].value th1 OR segments_gc[i].value th2) AND size(segments_gc[i].loci) 500 |
regions_suspicious=regions_suspicious segments_gc[i].loci step 2 |
END |
END |
segments_map=segmenting(smoothing(chr122.map)) |
FOR i IN 1 TO number_of_items_in_segments_map |
IF segments_map[i].value th3 AND size(segments_map[i].loci) 500 |
regions_suspicious = regions_suspicious segments_map[i] step 3 |
END |
END |
FOR i IN 1 TO number_of_items_in_DGV.xls |
variant_supporting = DGV[i].ID, DGV[i].chr, DGV[i].loci, DGV[i].subtype, DGV[i].ref, |
DGV[i].method, DGV[i].samples step 4 |
IF variant_supporting.method = ‘sequencing’ AND variant_supporting.sub = (‘loss’ OR ‘deletion’) |
AND size(variant_supporting.loci) 10000 AND variant_supporting.sample empty step 5 |
AND ( j such that F-score(variant_supporting.loci, regions_suspicious[j].loci) 0.9 step 7 |
IF variant_supporting is a duplicated items (chr and loci fields are same) |
ID, ref, and sample fields are merged to the existing one step 6 |
ELSE variants_suspicious=variants_suspicious variant_supporting step 7 |
END |
END |
END |
list_samples = variants_suspicious.samples |
download sequencing data of samples listed in list_samples, and preprocess to get BAM files. step 8 |
FOR i IN 1 TO number_of_items_in_variants_suspicious |
calculate PEM[i] and DOC[i] from BAM file(s) of variants_suspicious[i].samples step 9 |
IF PEM[i] meets false detection criterion step 10 |
variant_supporting.false = T; plot figure |
ELSE variant_supporting.false = F |
END |
END |
OUTPUTS: variants_suspicious, .figure step 11 |