Impute user Kantale
From PyPedia
Contents |
Documentation
Documentation for Impute user Kantale
Parameters
<inputs> </inputs>
Return
See also
Code
class Impute_user_Kantale(): def __init__(self, constants): self.constants = constants self.plink = self.constants["path_to_plink"] self.it = self.constants["imputationtool_path"] self.gtool = self.constants["path_to_gtool"] self.pbsStats = self.constants["pbsStats"] self.i2 = self.constants["path_to_impute2"] self.lengths_b37 = Get_length_of_chromosomes_build37_user_Kantale() self.lengths_b36 = Get_length_of_chromosomes_build36_user_Kantale() self.genetic_map_b37 = self.constants["path_to_genetic_map_b37"] self.study_fn = None self.study_dir = None self.study = None self.pwd = os.getcwd() self.reference_TT = None self.reference = None self.chromosomes = [str(x) for x in range(1,23)] self.study = False self.study_sample_filter = None self.release = "b37" self.software = "impute2" self.validation_fn = None self.validation_dir = None self.validation = None self.validation_type = None def set_study_fn(self, study_fn): self.study_fn = study_fn print "Study filename:", self.study_fn self.study_dir = os.path.split(self.study_fn)[0] print "Study directory:", self.study_dir self.study = os.path.splitext(os.path.split(self.study_fn)[1])[0] print "Study name:", self.study self.study_type = os.path.splitext(self.study_fn)[1].replace(".", "") print "Study type:", self.study_type def set_validation_fn(self, validation_fn): self.validation_fn = validation_fn self.validation_dir = os.path.split(self.validation_fn)[0] self.validation = os.path.splitext(os.path.split(self.validation_fn)[1])[0] self.validation_type = os.path.splitext(self.validation_fn)[1].replace(".", "") def set_reference_TT(self, reference_TT): self.reference_TT = reference_TT self.reference = os.path.split(os.path.split(self.reference_TT + "/")[0])[1] def set_temp_dir(self, temp_dir): self.temp_dir = temp_dir os.system("mkdir -p " + self.temp_dir) def set_study_sample_filter(self, study_sample_filter): self.study_sample_filter = study_sample_filter def get_reference_samples(self,): return len([True for y in [x.replace("\n", "").split() for x in open(os.path.join(self.reference_TT, "PhenotypeInformation.txt")).readlines()] if y[2] == "include"]) def apply_reference_TT_sample_filter(self, filter): open(os.path.join(self.reference_TT,"PhenotypeInformation.txt2"), "w").write(str.join("\n", [str.join("\t", [y[0], y[1], "include" if filter(y[0]) else "exclude", y[3]]) for y in [x.replace("\n", "").split() for x in open(os.path.join(self.reference_TT, "PhenotypeInformation.txt")).readlines()]])) command = "cp " + os.path.join(self.reference_TT,"PhenotypeInformation.txt") + " " + os.path.join(self.reference_TT,"PhenotypeInformation.txt.pypBackup") print command os.system(command) command = "cp " + os.path.join(self.reference_TT,"PhenotypeInformation.txt2") + " " + os.path.join(self.reference_TT,"PhenotypeInformation.txt") print command os.system(command) def convert_BED_PEDMAP(self,): commands = [] commands += [concat(["mkdir", "-p", os.path.join(self.temp_dir, self.study + "_PEDMAP")])] commands += [concat(["rm", os.path.join(self.temp_dir, self.study + "_PEDMAP") + "/*"])] keep = ["--keep", self.study_sample_filter] if self.study_sample_filter else [] commands += [concat([self.plink, "--bfile", os.path.join(self.study_dir, self.study)] + keep + ["--noweb", "--recode", "--out", os.path.join(self.temp_dir, self.study + "_PEDMAP/study")])] return [{ "name" : "convert_BED_PEDMAP", "command" : "Execute_commands_user_Kantale", "params" : {"commands" : commands,}, "prereq" : [], "walltime": "100:00:00", "mem" : "8GB", }] def remove_rs_from_PEDMAP(self,): p = [] p += [{ "name" : "Edit_study_PEDMAP_fixm9", "command" : "Edit_PEDMAP_user_Kantale", "params" : { "input_PED_filename" : os.path.join(self.temp_dir, self.study + "_PEDMAP/study.ped"), "input_MAP_filename" : os.path.join(self.temp_dir, self.study + "_PEDMAP/study.map"), "output_PED_filename" : os.path.join(self.temp_dir, self.study + "_PEDMAP/study.fix_m9.ped"), "output_MAP_filename" : os.path.join(self.temp_dir, self.study + "_PEDMAP/study.fix_m9.map"), "phenotype_function" : 'lambda a,b,c,d,e,f : "1"', "rs_function" : 'lambda a,b,c,d : a + "-" + d', }, "prereq" : [], "walltime" : "100:00:00", "mem" : "8GB", }] commands = [] commands += [concat(["mv", os.path.join(self.temp_dir, self.study + "_PEDMAP/study.ped"), os.path.join(self.temp_dir, self.study + "_PEDMAP/study.ped.backup")])] commands += [concat(["mv", os.path.join(self.temp_dir, self.study + "_PEDMAP/study.map"), os.path.join(self.temp_dir, self.study + "_PEDMAP/study.map.backup")])] p += [{ "name" : "remove_rs_from_PEDMAP", "command" : "Execute_commands_user_Kantale", "params" : { "commands" : commands }, "prereq" : ["Edit_study_PEDMAP_fixm9"], "walltime" : "1:00:00", "mem" : "1GB", }] return p def convert_PEDMAP_TT(self,): commands = [] commands += [concat(["mkdir", "-p", os.path.join(self.temp_dir, self.study + "_TT")])] commands += [concat(["rm", os.path.join(self.temp_dir, self.study + "_TT") + "/*"])] commands += [concat(["java", "-jar", self.it, "--mode", "pmtt", "--in", os.path.join(self.temp_dir, self.study + "_PEDMAP"), "--out", os.path.join(self.temp_dir, self.study + "_TT")])] return [{ "name" : "convert_PEDMAP_TT", "command" : "Execute_commands_user_Kantale", "params" : {"commands" : commands}, "prereq" : [], "walltime" : "1000:00:00", "mem" : "8GB", }] def compare_TT(self,): commands = [] commands += [concat(["mkdir", "-p", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference)])] commands += [concat(["rm", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/*"])] commands += [concat(["java", "-Xmx30g", "-jar", self.it, "--mode", "ttpmh", "--in", os.path.join(self.temp_dir, self.study + "_TT") + "/", "--hap", self.reference_TT + "/", "--out", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference + "/")])] return [{ "name" : "compare_TT", "command" : "Execute_commands_user_Kantale", "params" : {"commands" : commands}, "prereq" : [], "walltime" : "1000:00:00", "mem" : "32GB", }] def convert_compared_gen_sample(self,): p = [{ "name" : "convert_compared_gen_sample_begin", "command" : "No_op_user_Kantale", "params" : {}, "prereq" : [], "walltime" : "00:10:00", "mem" : "1GB", }] for chromosome in self.chromosomes: p += [{ "name" : "convert_compared_gen_sample_chr" + chromosome, "command" : "Execute_command_user_Kantale", "params" : { "command" : concat([self.gtool, "-P", "--ped", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".ped", "--map", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".map", "--og", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".gen", "--os", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".sample"]), }, "prereq" : ["convert_compared_gen_sample_begin"], "walltime" : "100:00:00", "mem" : "2GB", }] p += [{ "name" : "convert_compared_gen_sample", "command" : "No_op_user_Kantale", "params" : {}, "prereq" : ["convert_compared_gen_sample_chr" + chromosome for chromosome in self.chromosomes], "walltime" : "00:10:00", "mem" : "1GB", }] return p def convert_reference_TT_PEDMAP_hap_legend(self,): commands = [] commands += [concat(["mkdir", "-p", os.path.join(self.temp_dir, self.reference + "_PEDMAP")])] commands += [concat(["rm", os.path.join(self.temp_dir, self.reference + "_PEDMAP/*")])] commands += [concat(["java", "-jar", self.it, "--mode", "ttpm", "--in", self.reference_TT + "/", "--out", os.path.join(self.temp_dir, self.reference + "_PEDMAP/")])] p = [{ "name" : "convert_reference_TT_PEDMAP_hap_legend_begin", "command" : "Execute_commands_user_Kantale", "params" : {"commands" : commands}, "prereq" : [], "mem" : "8GB", "walltime" : "100:00:00", }] for chromosome in self.chromosomes: p += [{ "name" : "convert_reference_TT_PEDMAP_hap_legend_transpose_chr" + chromosome, "command" : "Execute_commands_user_Kantale", "params" : { "commands" : [concat([self.plink, "--file", os.path.join(self.temp_dir, self.reference + "_PEDMAP/output"), "--chr", chromosome, "--noweb", "--transpose", "--recode", "--out", os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome)])] }, "prereq" : ["convert_reference_TT_PEDMAP_hap_legend_begin"], "walltime" : "100:00:00", "mem" : "8GB", }] p += [{ "name" : "convert_reference_TT_PEDMAP_hap_legend_chr" + chromosome, "command" : "Convert_TPED_to_impute2_hap_legend_user_Kantale", "params" : { "input_TPED_filename" : os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".tped"), "input_TFAM_filename" : os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".tfam"), "output_hap_filename" : os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".hap"), "output_legend_filename" : os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".legend"), "missing_value" : "?", }, "prereq" : ["convert_reference_TT_PEDMAP_hap_legend_transpose_chr" + chromosome], "walltime" : "100:00:00", "mem" : "8GB", }] p += [{ "name" : "convert_reference_TT_PEDMAP_hap_legend", "command" : "No_op_user_Kantale", "params" : {}, "prereq" : ["convert_reference_TT_PEDMAP_hap_legend_chr" + chromosome for chromosome in self.chromosomes], "walltime" : "00:10:00", "mem" : "1GB", }] return p def impute_i2(self,): if self.release == "b37": lengths = self.lengths_b37 genetic_map = self.genetic_map_b37 elif self.release == "b36": lengths = self.lengths_b36 genetic_map = self.genetiv_map_b36 else: raise Exception("Invalid release:", self.release, " valid options: b36, b37") p = [{ "name" : "impute_i2_begin", "command" : "Execute_commands_user_Kantale", "params" : { "commands" : [ concat(["mkdir", "-p", os.path.join(self.temp_dir, "RESULTS")]), concat(["rm", os.path.join(self.temp_dir, "RESULTS/*")]), ], }, "prereq" : [], "mem" : "1GB", "walltime" : "00:10:00", }] prereq = [] self.i2_outputs = {} for chromosome in self.chromosomes: self.i2_outputs[chromosome] = [] for _from in range(1, lengths[chromosome], 5000000): to = _from + 5000000 - 1 p += [{ "name" : "impute2_chr_" + chromosome + "_" + str(_from) + "_" + str(to), "command" : "Execute_commands_user_Kantale", "params" : { "commands" : [ concat([ self.i2, "-h", os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".hap"), "-l", os.path.join(self.temp_dir, self.reference + "_PEDMAP/output_chr" + chromosome + ".legend"), "-m", genetic_map.replace("@CHR@", chromosome), "-g", os.path.join(self.temp_dir, self.study + "_COMPARE_" + self.reference) + "/chr" + chromosome + ".gen", "-int", str(_from), str(to), "-Ne", "20000", "-o", os.path.join(self.temp_dir, "RESULTS/output_chr_"+ chromosome + "_" + str(_from) + "_" + str(to)), ]), ], }, "prereq" : ["impute_i2_begin"], "walltime" : "1000:00:00", "mem" : "8GB", }] prereq += ["impute2_chr_" + chromosome + "_" + str(_from) + "_" + str(to)] self.i2_outputs[chromosome] += [os.path.join(self.temp_dir, "RESULTS/output_chr_"+ chromosome + "_" + str(_from) + "_" + str(to))] p += [{ "name" : "impute_i2", "command" : "No_op_user_Kantale", "params" : {}, "prereq" : prereq, "walltime" : "00:10:00", "mem" : "1GB", }] return p def process_i2_results(self,): commands = [] for chromosome in self.chromosomes: commands += [concat(["cat ", str.join(" ", self.i2_outputs[chromosome]), " > ", os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome) ])] commands += [concat(["cat ", str.join(" ", [x + "_info" for x in self.i2_outputs[chromosome]]), " > ", os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome + "_info") ])] p = [{ "name" : "process_i2_results_begin", "command" : "Execute_commands_user_Kantale", "params" : {"commands" : commands}, "prereq" : [], "walltime" : "24:00:00", "mem" : "8GB", }] for chromosome in self.chromosomes: p += [{ "name" : "process_i2_results_chr" + chromosome, "command" : "Convert_impute2_gprobs_to_PEDMAP_beagle_user_Kantale", "params" : { "input_impute2_gprobs_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome), "input_impute2_info_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome + "_info"), "output_TPED_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome) + ".tped", "output_TFAM_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome) + ".tfam", "output_stats_filename" : os.path.join(self.temp_dir, "RESULTS/output_chr_" + chromosome) + ".stats", "chromosome" : chromosome, }, "prereq" : ["process_i2_results_begin"], "walltime" : "100:00:00", "mem" : "8GB", }] p += [{ "name" : "process_i2_results", "command" : "No_op_user_Kantale", "params" : {}, "prereq" : ["process_i2_results_chr" + chromosome for x in self.chromosomes], "walltime" : "00:10:00", "mem" : "1GB", }] return p def add_step(self, command): p = command() if len(self.pipeline) > 0: p[0]["prereq"] = [self.pipeline[-1]["name"]] self.pipeline += p def do_impute(self): self.pipeline = [] if self.study_type == "bed": self.add_step(self.convert_BED_PEDMAP) else: raise Exception("Unknown study type:", self.study_type) self.add_step(self.remove_rs_from_PEDMAP) self.add_step(self.convert_PEDMAP_TT) self.add_step(self.compare_TT) self.add_step(self.convert_compared_gen_sample) self.add_step(self.convert_reference_TT_PEDMAP_hap_legend) if self.software == "impute2": self.add_step(self.impute_i2) self.add_step(self.process_i2_results) else: raise Exception("Unknown software:", self.software, " valid options: impute2") Run_pipeline_user_Kantale( pipeline = self.pipeline, pbsStats = self.pbsStats, temp_dir = os.path.join(self.temp_dir, "PBS"), delay = 5, # dummy = True, verbose = True, ) def concat(params): return str.join(" ", params)
Unit Tests
def uni1(): return True
Development Code
def Impute_user_Kantale(): pass
Permissions
Documentation Permissions
Kantale
Code Permissions
Kantale
Unit Tests Permissions
Kantale