Impute user Kantale

From PyPedia
Revision as of 23:16, 23 March 2012 by Kantale (Talk)

Jump to: navigation, search

Contents

Documentation

Documentation for Impute user Kantale

Parameters

<inputs>
</inputs>

Return

See also

Code

class Impute_user_Kantale():

	def __init__(self, constants):
		self.constants = constants
		self.plink = self.constants["path_to_plink"]
		self.it = self.constants["imputationtool_path"]
		self.gtool = self.constants["path_to_gtool"]
		self.pbsStats = self.constants["pbsStats"]
		self.i2 = self.constants["path_to_impute2"]
		self.lengths_b37 = Get_length_of_chromosomes_build37_user_Kantale()
		self.lengths_b36 = Get_length_of_chromosomes_build36_user_Kantale()
		self.genetic_map_b37 = self.constants["path_to_genetic_map_b37"]

		self.study_fn = None
		self.study_dir = None
		self.study = None
		self.pwd = os.getcwd()
		self.reference_TT = None
		self.reference = None
		self.chromosomes = [str(x) for x in range(1,23)]
		self.study = False
		self.study_sample_filter = None
		self.release = "b37"
		self.software = "impute2"
		self.validation_fn = None
		self.validation_dir = None
		self.validation = None
		self.validation_type = None

	def set_study_fn(self, study_fn):
		self.study_fn = study_fn
		print "Study filename:", self.study_fn

		self.study_dir = os.path.split(self.study_fn)[0]
		print "Study directory:", self.study_dir

		self.study = os.path.splitext(os.path.split(self.study_fn)[1])[0]
		print "Study name:", self.study

		self.study_type = os.path.splitext(self.study_fn)[1].replace(".", "")
		print "Study type:", self.study_type

	def set_validation_fn(self, validation_fn):
		self.validation_fn = validation_fn
		self.validation_dir = os.path.split(self.validation_fn)[0]
		self.validation = os.path.splitext(os.path.split(self.validation_fn)[1])[0]
		self.validation_type = os.path.splitext(self.validation_fn)[1].replace(".", "")

	def set_reference_TT(self, reference_TT):
		self.reference_TT = reference_TT
		self.reference = os.path.split(os.path.split(self.reference_TT + "/")[0])[1]

	def set_temp_dir(self, temp_dir):
		self.temp_dir = temp_dir
		os.system("mkdir -p " + self.temp_dir)

	def set_study_sample_filter(self, study_sample_filter):
		self.study_sample_filter = study_sample_filter

	def get_reference_samples(self,):
		return len([True for y in [x.replace("\n", "").split() for x in open(os.path.join(self.reference_TT, "PhenotypeInformation.txt")).readlines()] if y[2] == "include"])

	def apply_reference_TT_sample_filter(self, filter):
		open(os.path.join(self.reference_TT,"PhenotypeInformation.txt2"), "w").write(str.join("\n", [str.join("\t", [y[0], y[1], "include" if filter(y[0]) else "exclude", y[3]]) for y in [x.replace("\n", "").split() for x in open(os.path.join(self.reference_TT, "PhenotypeInformation.txt")).readlines()]]))

		command = "cp " + os.path.join(self.reference_TT,"PhenotypeInformation.txt") + " " + os.path.join(self.reference_TT,"PhenotypeInformation.txt.pypBackup")
		print command
		os.system(command)

		command = "cp " + os.path.join(self.reference_TT,"PhenotypeInformation.txt2") + " " + os.path.join(self.reference_TT,"PhenotypeInformation.txt")
		print command
		os.system(command)

	def convert_BED_PEDMAP(self,):
		commands = []
		commands += [concat(["mkdir", "-p", os.path.join(self.temp_dir, self.study + "_PEDMAP")])]
		commands += [concat(["rm", os.path.join(self.temp_dir, self.study + "_PEDMAP") + "/*"])]

		keep = ["--keep", self.study_sample_filter] if self.study_sample_filter else []

		commands += [concat([self.plink, "--bfile", os.path.join(self.study_dir, self.study)] + keep +  ["--noweb", "--recode", "--out", os.path.join(self.temp_dir, self.study + "_PEDMAP/study")])]

		return [{
			"name" : "convert_BED_PEDMAP",
			"command" : "Execute_commands_user_Kantale",
			"params" : {"commands" : commands,},
			"prereq" : [],
			"walltime": "100:00:00",
			"mem" : "8GB",
			}]

	def remove_rs_from_PEDMAP(self,):
		p = []

		p += [{
			"name" : "Edit_study_PEDMAP_fixm9",
			"command" : "Edit_PEDMAP_user_Kantale", 
			"params" : {
				"input_PED_filename" : os.path.join(self.temp_dir, self.study + "_PEDMAP/study.ped"),
				"input_MAP_filename" : os.path.join(self.temp_dir, self.study + "_PEDMAP/study.map"),
				"output_PED_filename" : os.path.join(self.temp_dir, self.study + "_PEDMAP/study.fix_m9.ped"),
				"output_MAP_filename" : os.path.join(self.temp_dir, self.study + "_PEDMAP/study.fix_m9.map"),
				"phenotype_function" : 'lambda a,b,c,d,e,f : "1"',
				"rs_function" : 'lambda a,b,c,d : a + "-" + d',
				},
			"prereq" : [],
			"walltime" : "100:00:00",
			"mem" : "8GB",
			}]

		commands = []
		commands += [concat(["mv", os.path.join(self.temp_dir, self.study + "_PEDMAP/study.ped"), os.path.join(self.temp_dir, self.study + "_PEDMAP/study.ped.backup")])]
		commands += [concat(["mv", os.path.join(self.temp_dir, self.study + "_PEDMAP/study.map"), os.path.join(self.temp_dir, self.study + "_PEDMAP/study.map.backup")])]

		p += [{
			"name" : "remove_rs_from_PEDMAP",
			"command" : "Execute_commands_user_Kantale",
			"params" : {
				"commands" : commands
				},
			"prereq" : ["Edit_study_PEDMAP_fixm9"],
			"walltime" : "1:00:00",
			"mem" : "1GB",
			}]
		
		return p

	def convert_PEDMAP_TT(self,):
		commands = []
		commands += [concat(["mkdir", "-p", os.path.join(self.temp_dir, self.study + "_TT")])]
		commands += [concat(["rm", os.path.join(self.temp_dir, self.study + "_TT") + "/*"])]
		commands += [concat(["java", "-jar", self.it, "--mode", "pmtt", "--in", os.path.join(self.temp_dir, self.study + "_PEDMAP"), "--out", os.path.join(self.temp_dir, self.study + "_TT")])]

		return [{
			"name" : "convert_PEDMAP_TT",
			"command" : "Execute_commands_user_Kantale",
			"params" : {"commands" : commands},
			"prereq" : [],
			"walltime" : "1000:00:00",
			"mem" : "8GB",
			}]

	def compare_TT(self,):
		commands = []

		commands += [concat(["mkdir", "-p", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference)])]
		commands += [concat(["rm", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/*"])]
		commands += [concat(["java", "-Xmx30g", "-jar", self.it, "--mode", "ttpmh", "--in", os.path.join(self.temp_dir, self.study + "_TT") + "/", "--hap", self.reference_TT + "/", "--out", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference + "/")])]

		return [{
			"name" : "compare_TT",
			"command" : "Execute_commands_user_Kantale",
			"params" : {"commands" : commands},
			"prereq" : [],
			"walltime" : "1000:00:00",
			"mem" : "32GB",
			}]

	def convert_compared_gen_sample(self,):
		
		p = [{
			"name" : "convert_compared_gen_sample_begin",
			"command" : "No_op_user_Kantale",
			"params" : {},
			"prereq" : [],
			"walltime" : "00:10:00",
			"mem" : "1GB",
		}]

		for chromosome in self.chromosomes:
			p += [{
				"name" : "convert_compared_gen_sample_chr" + chromosome,
				"command" : "Execute_command_user_Kantale",
				"params" : {
					"command" : concat([self.gtool, "-P", "--ped", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".ped", "--map", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".map", "--og", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".gen", "--os", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".sample"]),
					},
				"prereq" : ["convert_compared_gen_sample_begin"],
				"walltime" : "100:00:00",
				"mem" : "2GB",
			}]

		p += [{
			"name" : "convert_compared_gen_sample",
			"command" : "No_op_user_Kantale",
			"params" : {},
			"prereq" : ["convert_compared_gen_sample_chr" + chromosome for chromosome in self.chromosomes],
			"walltime" : "00:10:00",
			"mem" : "1GB",
		}]

		return p

	def convert_reference_TT_PEDMAP_hap_legend(self,):

		commands = []
		commands += [concat(["mkdir", "-p", os.path.join(self.temp_dir, self.reference + "_PEDMAP")])]
		commands += [concat(["rm",  os.path.join(self.temp_dir, self.reference + "_PEDMAP/*")])]
		commands += [concat(["java", "-jar", self.it, "--mode", "ttpm", "--in", self.reference_TT + "/", "--out", os.path.join(self.temp_dir, self.reference + "_PEDMAP/")])]

		p = [{
			"name" : "convert_reference_TT_PEDMAP_hap_legend_begin",
			"command" : "Execute_commands_user_Kantale",
			"params" : {"commands" : commands},
			"prereq" : [],
			"mem" : "8GB",
			"walltime" : "100:00:00",
		}]

		for chromosome in self.chromosomes:
			p += [{
				"name" : "convert_reference_TT_PEDMAP_hap_legend_transpose_chr" + chromosome,
				"command" : "Execute_commands_user_Kantale",
				"params" : {
					"commands" : [concat([self.plink, "--file", os.path.join(self.temp_dir, self.reference + "_PEDMAP/output"), "--chr", chromosome, "--noweb", "--transpose", "--recode", "--out", os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome)])]
					},
				"prereq" : ["convert_reference_TT_PEDMAP_hap_legend_begin"],
				"walltime" : "100:00:00",
				"mem" : "8GB",
				}]

			p += [{
				"name" : "convert_reference_TT_PEDMAP_hap_legend_chr" + chromosome,
				"command" : "Convert_TPED_to_impute2_hap_legend_user_Kantale",
				"params" : {
					"input_TPED_filename"    : os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".tped"),
					"input_TFAM_filename"    : os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".tfam"),
					"output_hap_filename"    : os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".hap"),
					"output_legend_filename" : os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".legend"),
					"missing_value" : "?",
					},
				"prereq" : ["convert_reference_TT_PEDMAP_hap_legend_transpose_chr" + chromosome],
				"walltime" : "100:00:00",
				"mem" : "8GB",
				}]

		p += [{
			"name" : "convert_reference_TT_PEDMAP_hap_legend",
			"command" : "No_op_user_Kantale",
			"params" : {},
			"prereq" : ["convert_reference_TT_PEDMAP_hap_legend_chr" + chromosome for chromosome in self.chromosomes],
			"walltime" : "00:10:00",
			"mem" : "1GB",
			}]

		return p

	def impute_i2(self,):

		if self.release == "b37":
			lengths = self.lengths_b37
			genetic_map = self.genetic_map_b37
		elif self.release == "b36":
			lengths = self.lengths_b36
			genetic_map = self.genetiv_map_b36
		else:
			raise Exception("Invalid release:", self.release, " valid options: b36, b37")

		p = [{
			"name" : "impute_i2_begin",
			"command" : "Execute_commands_user_Kantale",
			"params" : {
				"commands" : [
						concat(["mkdir", "-p", os.path.join(self.temp_dir, "RESULTS")]),
						concat(["rm", os.path.join(self.temp_dir, "RESULTS/*")]),
					],

				},
			"prereq" : [],
			"mem" : "1GB",
			"walltime" : "00:10:00",
		}]

		prereq = []
		self.i2_outputs = {}

		for chromosome in self.chromosomes:
			self.i2_outputs[chromosome] = []
			for _from in range(1, lengths[chromosome], 5000000):
				to = _from + 5000000 - 1
				p += [{
					"name" : "impute2_chr_" + chromosome + "_" + str(_from) + "_" + str(to),
					"command" : "Execute_commands_user_Kantale",
					"params" : {
						"commands" : [
								concat([
									self.i2, 
									"-h", os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".hap"),
									"-l", os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".legend"),
									"-m", genetic_map.replace("@CHR@", chromosome),
									"-g", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".gen",
									"-int", str(_from), str(to),
									"-Ne", "20000",
									"-o", os.path.join(self.temp_dir, "RESULTS/output_chr_"+ chromosome + "_" + str(_from) + "_" + str(to)),
								]),
								],
						},
					"prereq" : ["impute_i2_begin"],
					"walltime" : "1000:00:00",
					"mem" : "8GB",
					}]

				prereq += ["impute2_chr_" + chromosome + "_" + str(_from) + "_" + str(to)]
				self.i2_outputs[chromosome] += [os.path.join(self.temp_dir, "RESULTS/output_chr_"+ chromosome + "_" + str(_from) + "_" + str(to))]

		p += [{
			"name" : "impute_i2",
			"command" : "No_op_user_Kantale",
			"params" : {},
			"prereq" : prereq,
			"walltime" : "00:10:00",
			"mem" : "1GB",
		}]

		return p

	def process_i2_results(self,):
		commands = []
		for chromosome in self.chromosomes:
			commands += [concat(["cat ", str.join(" ", self.i2_outputs[chromosome]), " > ", os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome) ])]
			commands += [concat(["cat ", str.join(" ", [x + "_info" for x in self.i2_outputs[chromosome]]), " > ", os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome + "_info") ])]
			
		p = [{
			"name" : "process_i2_results_begin",
			"command" : "Execute_commands_user_Kantale",
			"params" : {"commands" : commands},
			"prereq" : [],
			"walltime" : "24:00:00",
			"mem" : "8GB",
			}]
		
		for chromosome in self.chromosomes:
			p += [{
				"name" : "process_i2_results_chr" + chromosome,
				"command" : "Convert_impute2_gprobs_to_PEDMAP_beagle_user_Kantale",
				"params" : {
					"input_impute2_gprobs_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome),
					"input_impute2_info_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome + "_info"),
					"output_TPED_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome) + ".tped",
					"output_TFAM_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome) + ".tfam",
					"output_stats_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome) + ".stats",
					"chromosome" : chromosome,
					},
				"prereq" : ["process_i2_results_begin"],
				"walltime" : "100:00:00",
				"mem" : "8GB",
			}]
		
		p += [{
			"name" : "process_i2_results",
			"command" : "No_op_user_Kantale",
			"params" : {},
			"prereq" : ["process_i2_results_chr" + chromosome for x in self.chromosomes],
			"walltime" : "00:10:00",
			"mem" : "1GB",
			}]
			
		return p

	def add_step(self, command):
		
		p = command()
			
		if len(self.pipeline) > 0: 
			p[0]["prereq"] = [self.pipeline[-1]["name"]]

		self.pipeline += p

	def do_impute(self):
		self.pipeline = []
		if self.study_type == "bed":
			self.add_step(self.convert_BED_PEDMAP)
		else:
			raise Exception("Unknown study type:", self.study_type)


		self.add_step(self.remove_rs_from_PEDMAP)
		self.add_step(self.convert_PEDMAP_TT)
		self.add_step(self.compare_TT)
		self.add_step(self.convert_compared_gen_sample)
		self.add_step(self.convert_reference_TT_PEDMAP_hap_legend)

		if self.software == "impute2":
			self.add_step(self.impute_i2)
			self.add_step(self.process_i2_results)
		else:
			raise Exception("Unknown software:", self.software, " valid options: impute2")

		Run_pipeline_user_Kantale(
			pipeline = self.pipeline,
			pbsStats = self.pbsStats,
			temp_dir = os.path.join(self.temp_dir, "PBS"),
			delay = 5,
#			dummy = True,
			verbose = True,
		)		

def concat(params):
	return str.join(" ", params)

Unit Tests

def uni1():
	return True

Development Code

def Impute_user_Kantale():
	pass

Permissions

Documentation Permissions

Kantale

Code Permissions

Kantale

Unit Tests Permissions

Kantale

Permissions Permissions

Kantale

Personal tools
Namespaces

Variants
Actions
Navigation
Toolbox