Convert TPED to VCF user Kantale
From PyPedia
Contents |
[edit] Documentation
Documentation for Convert TPED to VCF
[edit] Parameters
<inputs> <param name="input_tped_filename" type="data" value="" label="input TPED filename: "/> <param name="input_tfam_filename" type="data" value="" label="input TFAM (or PED) filename: "/> <param name="output_vcf_filename" type="data" value="" label="output VCF filename: "/> <param name="verbose" type="eval" value="True" label="Verbose: "/> </inputs>
[edit] Return
[edit] See also
Convert_PEDMAP_to_VCF_user_Kantale
[edit] Code
def Convert_TPED_to_VCF_user_Kantale( input_tped_filename = None, input_tfam_filename = None, #You can either have a PED file here output_vcf_filename = None, verbose = False, ): #Get sample names from ped file print "Reading sample names.." sample_names = [] input_tfam_file = open(input_tfam_filename) while True: line = input_tfam_file.readline() if not line: break sample_names += [line.replace("\n", "").split()[1]] sample_names_length = len(sample_names) if sample_names_length % 100 == 0: print "Samples:", sample_names_length, Print_now_user_Kantale() input_tfam_file.close() print "..DONE" header, header_str = Default_VCF_header_user_Kantale(header = None, sample_names = sample_names) output_vcf_file = open(output_vcf_filename, "w") output_vcf_file.write(header_str) line_no = 0 filtered = 0 non_SNPs = 0 input_tped_file = open(input_tped_filename) print "Converting to VCF.." while True: line = input_tped_file.readline() if not line: break line_no += 1 if line_no % 10000 == 0: print "Lines:", line_no, Print_now_user_Kantale() line_splitted = line.replace("\n", "").split() chromosome = line_splitted[0] to_print = [chromosome, line_splitted[3], line_splitted[1]] al1, al2 = (None, None) observations_chr1 = [] observations_chr2 = [] al1 = None for gen in [(line_splitted[4+x], line_splitted[5+x]) for x in range(0,2*sample_names_length,2)]: if not al1 and gen[0] != "0": al1 = gen[0] if not al1 and gen[1] != "0": al1 = gen[1] if not al1: continue if al1 != gen[0] and gen[0] != "0": al2 = gen[0] if al1 != gen[1] and gen[1] != "0": al2 = gen[1] if gen[0] != "0": observations_chr1 += [gen[0]] if gen[1] != "0": observations_chr2 += [gen[1]] if al1 not in ["A", "C", "G", "T"]: non_SNPs += 1 print "Warning non SNP found: ", al1, al2, " in line:", line_no continue if al1 and al2: (maf, minor_allele, obs) = Minor_allele_frequency_user_Kantale( allele1 = al1, allele2 = al2, observations_chr1 = observations_chr1, observations_chr2 = observations_chr2 ) alternative = minor_allele reference = al1 if minor_allele == al2 else al2 else: if al1: reference = al1 alternative = "." elif al2: reference = al2 alternative = "." else: if verbose: print "warning: Cannot infer reference/alternatice information for SNP:", line_splitted[1] print line_splitted[4:] filtered += 1 continue to_print += [ reference, alternative, "100", "PASS", "NS=" + str(sample_names_length), "GT"] for gen in [(line_splitted[4+x], line_splitted[5+x]) for x in range(0,2*sample_names_length,2)]: if gen[0] == reference: genotype = "0" elif gen[0] == alternative: genotype = "1" elif gen[0] == "0": genotype = "." else: print gen print "reference:", reference print "alternative:", alternative raise Exception("exmm..") if gen[1] == reference: genotype += "/0" elif gen[1] == alternative: genotype += "/1" elif gen[1] == "0": genotype += "/." else: print "genotype:", gen print "reference:", reference print "alternative:", alternative raise Exception("exmm..") to_print += [genotype] output_vcf_file.write(str.join("\t", to_print) + "\n") input_tped_file.close() output_vcf_file.close() print "Filtered out SNPs: ", filtered print "Non SNPs found: ", non_SNPs print "Output VCF filename: ", output_vcf_filename
[edit] Unit Tests
def uni1(): return True
[edit] Development Code
def Convert_TPED_to_VCF_user_Kantale(): pass
[edit] Permissions
[edit] Documentation Permissions
Kantale
[edit] Code Permissions
Kantale
[edit] Unit Tests Permissions
Kantale