Convert beagle to VCF user Kantale

From PyPedia
Jump to: navigation, search

Contents

[edit] Documentation

Convert from beagle to Variant Call Format


[edit] Parameters

<inputs>
</inputs>


[edit] Return

[edit] See also

[edit] Code

#Two Columns per sample
def Convert_beagle_to_VCF_user_Kantale(
	input_beagle_filename = None,
	input_markers_filename = None,
	output_VCF_filename = None,
	chromosome = None,
	phased = False,
):

	input_beagle_file = open(input_beagle_filename)

	str_sep = "|" if phased else "/"

#Read markers file
	markers = {}
	for x in open(input_markers_filename).readlines():
		xs = x.replace("\n", "").split()
		markers[xs[0]] = xs[1:]

	output_VCF_file = open(output_VCF_filename, "w")
	
	sample_names = None
	lines = 0

	while True:
		beagle_line = input_beagle_file.readline()
		if not beagle_line: break
		lines += 1

		if lines % 10000 == 0: print "lines:", lines
	
		beagle_line_splitted = beagle_line.replace("\n", "").split()
		
		if not sample_names:
			if beagle_line_splitted[0] == "I":
				sample_names = [x for index, x in enumerate(beagle_line_splitted[2:]) if index % 2 == 1]
				header, header_str = Default_VCF_header_user_Kantale(sample_names = sample_names)
				output_VCF_file.write(header_str)
				continue


		if beagle_line_splitted[0] == "M":

			genotypes_1 = [x for index, x in enumerate(beagle_line_splitted[2:]) if x != "0" and index % 2 == 1]
			genotypes_2 = [x for index, x in enumerate(beagle_line_splitted[2:]) if x != "0" and index % 2 == 0]
			allele1 = [x for x in genotypes_1 if x != "0"]
			if len(allele1) != 0:
				allele1 = allele1[0]
			else:
				print lines
				print "No allele1 info:", genotypes_1, genotypes_2
				continue
			allele2 = [x for x in genotypes_1 if x != allele1]
			if len(allele2) != 0:
				allele2 = allele2[0]
			else:
				allele2 = [x for x in genotypes_2 if x != allele1]
				if len(allele2) != 0:
					allele2 = allele2[0]
				else:
					allele2 = None
					#print lines
					#print beagle_line_splitted				
					#raise Exception("oups")

			if allele2:
				maf, minor_allele, obs = Minor_allele_frequency_user_Kantale(
					allele1 = allele1,
					allele2 = allele2,
					observations_chr1 = genotypes_1,
					observations_chr2 = genotypes_2)
				if minor_allele == allele1: major_allele = allele2
				elif minor_allele == allele2: major_allele = allele1
				else:
					raise Exception("WTF!")
			else:
				minor_allele = None
				major_allele = allele2

			if markers.has_key(beagle_line_splitted[1]):
				mf = markers[beagle_line_splitted[1]]
				a1 = mf[1]
				a2 = mf[2]
				to_print = [chromosome, mf[0], beagle_line_splitted[1], a1, a2, ".", ".",".","GT"]
				for i in xrange(len(beagle_line_splitted[2:])/2):
					current = []
					for g in beagle_line_splitted[2+i:4+i]:
						if g == a1: current += "0"
						elif g == a2: current += "1"
						elif g == "0": current += "."
						else:
							raise Exception("Ehm...")
					to_print += [str.join(str_sep, current)]
				output_VCF_file.write(str.join("\t", to_print) + "\n")
			else:
				print lines
				print "Could not find: " + beagle_line_splitted[1]


	input_beagle_file.close()
	output_VCF_file.close()

[edit] Unit Tests

def uni1():
	return True

[edit] Development Code

def Convert_beagle_to_VCF_user_Kantale():
	pass

[edit] Permissions

[edit] Documentation Permissions

Kantale

[edit] Code Permissions

Kantale

[edit] Unit Tests Permissions

Kantale

[edit] Permissions Permissions

Kantale

Personal tools
Namespaces

Variants
Actions
Navigation
Toolbox