Convert beagle to VCF user Kantale
From PyPedia
Contents |
[edit] Documentation
Convert from beagle to Variant Call Format
[edit] Parameters
<inputs> </inputs>
[edit] Return
[edit] See also
[edit] Code
#Two Columns per sample def Convert_beagle_to_VCF_user_Kantale( input_beagle_filename = None, input_markers_filename = None, output_VCF_filename = None, chromosome = None, phased = False, ): input_beagle_file = open(input_beagle_filename) str_sep = "|" if phased else "/" #Read markers file markers = {} for x in open(input_markers_filename).readlines(): xs = x.replace("\n", "").split() markers[xs[0]] = xs[1:] output_VCF_file = open(output_VCF_filename, "w") sample_names = None lines = 0 while True: beagle_line = input_beagle_file.readline() if not beagle_line: break lines += 1 if lines % 10000 == 0: print "lines:", lines beagle_line_splitted = beagle_line.replace("\n", "").split() if not sample_names: if beagle_line_splitted[0] == "I": sample_names = [x for index, x in enumerate(beagle_line_splitted[2:]) if index % 2 == 1] header, header_str = Default_VCF_header_user_Kantale(sample_names = sample_names) output_VCF_file.write(header_str) continue if beagle_line_splitted[0] == "M": genotypes_1 = [x for index, x in enumerate(beagle_line_splitted[2:]) if x != "0" and index % 2 == 1] genotypes_2 = [x for index, x in enumerate(beagle_line_splitted[2:]) if x != "0" and index % 2 == 0] allele1 = [x for x in genotypes_1 if x != "0"] if len(allele1) != 0: allele1 = allele1[0] else: print lines print "No allele1 info:", genotypes_1, genotypes_2 continue allele2 = [x for x in genotypes_1 if x != allele1] if len(allele2) != 0: allele2 = allele2[0] else: allele2 = [x for x in genotypes_2 if x != allele1] if len(allele2) != 0: allele2 = allele2[0] else: allele2 = None #print lines #print beagle_line_splitted #raise Exception("oups") if allele2: maf, minor_allele, obs = Minor_allele_frequency_user_Kantale( allele1 = allele1, allele2 = allele2, observations_chr1 = genotypes_1, observations_chr2 = genotypes_2) if minor_allele == allele1: major_allele = allele2 elif minor_allele == allele2: major_allele = allele1 else: raise Exception("WTF!") else: minor_allele = None major_allele = allele2 if markers.has_key(beagle_line_splitted[1]): mf = markers[beagle_line_splitted[1]] a1 = mf[1] a2 = mf[2] to_print = [chromosome, mf[0], beagle_line_splitted[1], a1, a2, ".", ".",".","GT"] for i in xrange(len(beagle_line_splitted[2:])/2): current = [] for g in beagle_line_splitted[2+i:4+i]: if g == a1: current += "0" elif g == a2: current += "1" elif g == "0": current += "." else: raise Exception("Ehm...") to_print += [str.join(str_sep, current)] output_VCF_file.write(str.join("\t", to_print) + "\n") else: print lines print "Could not find: " + beagle_line_splitted[1] input_beagle_file.close() output_VCF_file.close()
[edit] Unit Tests
def uni1(): return True
[edit] Development Code
def Convert_beagle_to_VCF_user_Kantale(): pass
[edit] Permissions
[edit] Documentation Permissions
Kantale
[edit] Code Permissions
Kantale
[edit] Unit Tests Permissions
Kantale