Convert TPED to VCF user Kantale

From PyPedia
Jump to: navigation, search


Contents

[edit] Documentation

Documentation for Convert TPED to VCF

[edit] Parameters

input TPED filename:
input TFAM (or PED) filename:
output VCF filename:
Verbose:

<inputs>
<param name="input_tped_filename" type="data" value="" label="input TPED filename: "/>
<param name="input_tfam_filename" type="data" value="" label="input TFAM (or PED) filename: "/>
<param name="output_vcf_filename" type="data" value="" label="output VCF filename: "/>
<param name="verbose" type="eval" value="True" label="Verbose: "/>
</inputs>


[edit] Return

[edit] See also

Convert_PEDMAP_to_VCF_user_Kantale

[edit] Code

def Convert_TPED_to_VCF_user_Kantale(
	input_tped_filename = None,
	input_tfam_filename = None, #You can either have a PED file here
	output_vcf_filename = None,
	verbose = False,
	):
	
	#Get sample names from ped file
	print "Reading sample names.."
	sample_names = []
	input_tfam_file = open(input_tfam_filename)
	while True:
		line = input_tfam_file.readline()
		if not line:
			break
		
		sample_names += [line.replace("\n", "").split()[1]]
		sample_names_length = len(sample_names)
		if sample_names_length % 100 == 0:
			print "Samples:", sample_names_length, Print_now_user_Kantale()
	input_tfam_file.close()
	print "..DONE"
	
	header, header_str = Default_VCF_header_user_Kantale(header = None, sample_names = sample_names)
	
	output_vcf_file = open(output_vcf_filename, "w")
	output_vcf_file.write(header_str)
	
	line_no = 0
	filtered = 0
	non_SNPs = 0
	input_tped_file = open(input_tped_filename)
	print "Converting to VCF.."
	while True:
		line = input_tped_file.readline()
		if not line:
			break
		
		line_no += 1
		if line_no % 10000 == 0: 
			print "Lines:", line_no, Print_now_user_Kantale()
		
		line_splitted = line.replace("\n", "").split()
		chromosome = line_splitted[0]

		to_print = [chromosome, line_splitted[3], line_splitted[1]]
		
		al1, al2 = (None, None)
		observations_chr1 = []
		observations_chr2 = []
		al1 = None
		for gen in [(line_splitted[4+x], line_splitted[5+x]) for x in range(0,2*sample_names_length,2)]:
			if not al1 and gen[0] != "0": al1 = gen[0]
			if not al1 and gen[1] != "0": al1 = gen[1]

			if not al1: continue

			if al1 != gen[0] and gen[0] != "0": al2 = gen[0]
			if al1 != gen[1] and gen[1] != "0": al2 = gen[1]

			if gen[0] != "0": observations_chr1 += [gen[0]]
			if gen[1] != "0": observations_chr2 += [gen[1]]

		if al1 not in ["A", "C", "G", "T"]:
			non_SNPs += 1
			print "Warning non SNP found: ", al1, al2, " in line:", line_no
			continue

		if al1 and al2:
			(maf, minor_allele, obs) = Minor_allele_frequency_user_Kantale(
						allele1 = al1,
						allele2 = al2,
						observations_chr1 = observations_chr1,
						observations_chr2 = observations_chr2
									)
			alternative = minor_allele
			reference = al1 if minor_allele == al2 else al2
		else:
			if al1:
				reference = al1
				alternative = "."
			elif al2:
				reference = al2
				alternative = "."
			else:
				if verbose:
					print "warning: Cannot infer reference/alternatice information for SNP:", line_splitted[1]
					print line_splitted[4:]
				filtered += 1
				continue

		to_print += [ reference, alternative, "100", "PASS", "NS=" + str(sample_names_length), "GT"]
		for gen in [(line_splitted[4+x], line_splitted[5+x]) for x in range(0,2*sample_names_length,2)]:
			if gen[0] == reference: genotype = "0"
			elif gen[0] == alternative: genotype = "1"
			elif gen[0] == "0": genotype = "."
			else:
				print gen
				print "reference:", reference
				print "alternative:", alternative
				raise Exception("exmm..")
			if gen[1] == reference: genotype += "/0"
			elif gen[1] == alternative: genotype += "/1"
			elif gen[1] == "0": genotype += "/."
			else:
				print "genotype:", gen
				print "reference:", reference
				print "alternative:", alternative
				raise Exception("exmm..")

			to_print += [genotype]

		output_vcf_file.write(str.join("\t", to_print) + "\n")
		
	input_tped_file.close()
	output_vcf_file.close()
	
	print "Filtered out SNPs: ", filtered
	print "Non SNPs found: ", non_SNPs
	print "Output VCF filename: ", output_vcf_filename

[edit] Unit Tests

def uni1():
	return True

[edit] Development Code

def Convert_TPED_to_VCF_user_Kantale():
	pass

[edit] Permissions

[edit] Documentation Permissions

Kantale

[edit] Code Permissions

Kantale

[edit] Unit Tests Permissions

Kantale

[edit] Permissions Permissions

Kantale

Personal tools
Namespaces

Variants
Actions
Navigation
Toolbox