Assembly impute2 gprobs bins user Kantale
From PyPedia
Contents |
[edit] Documentation
Assembly gprobs files from multiple imputation pipelines from several bins into one file. This method is designed to assembly gprobs files created by the Imputation_pipeline_user_Kantale.
[edit] Parameters
<inputs> </inputs>
[edit] Return
[edit] See also
Imputation_pipeline_user_Kantale
[edit] Code
def Assembly_impute2_gprobs_bins_user_Kantale( bins_filenames = None, bin_sizes = None, #Array of size of each bin tfam_filename = None, gprobs_filenames = None, impute2_info_filenames = None, output_filename = None, output_info_filename = None, ): #Read tfam if tfam_filename: tfam = open(tfam_filename).readlines() #Read bins print "Reading indexed.." indexes = [] bin_sizes = [0] * len(bins_filenames) for bin_index, bin_filename in enumerate(bins_filenames): bin_file = open(bin_filename) bin_file_lines = bin_file.readlines() bin_sizes[bin_index] = len(bin_file_lines) for bin_file_line in bin_file_lines: indexes += [tfam.index(bin_file_line)] bin_file.close() total_samples = sum(bin_sizes) print "..done reading indexes" else: total_samples = sum(bin_sizes) indexes = range(total_samples) #Open output file output_file = open(output_filename, "w") #Open output info filename if output_info_filename: output_info_file = open(output_info_filename, "w") #Open all grobs files gprobs_files = [open(filename) for filename in gprobs_filenames] #Open impute2 info files if impute2_info_filenames: impute2_info_files = [open(filename) for filename in impute2_info_filenames] lines = 0 snp_found_problems = 0 snp_bin_problems = 0 goto_next_line = [True] * len(gprobs_files) gprobs_files_lines = [None] * len(gprobs_files) impute2_info_files_lines = [None] * len(gprobs_files) while True: #Read lines from all files in goto_next_line for index in range(len(gprobs_files)): if goto_next_line[index]: gprobs_files_lines[index] = gprobs_files[index].readline() if impute2_info_filenames: impute2_info_files_lines[index] = impute2_info_files[index].readline() goto_next_line = [True] * len(gprobs_files) if all(not x for x in gprobs_files_lines): break gprobs_files_ls = [line.replace("\n", "").split() for line in gprobs_files_lines] if impute2_info_filenames: info_files_ls = [line.replace("\n", "").split() for line in impute2_info_files_lines] lines += 1 if lines % 1000 == 0: print "Lines: ", lines, Print_now_user_Kantale() #Take the first 4 columns signatures = [ls[0:5] for ls in gprobs_files_ls] #All these should be the same if len(set([str(x) for x in signatures])) > 1: #If not.. #Check panels sig_1 = [ls[1:5] for ls in gprobs_files_ls] if len(set([str(x) for x in sig_1])) == 1: print "Warning: A SNP is found in original study, only for part of the bins: " + str(signatures) snp_found_problems += 1 continue #Check positions sig_2 = [int(x[2]) for x in signatures] if len(set(sig_2)) > 1: print "Warning: A SNP is not present in all bins: " + str(signatures) snp_bin_problems += 1 #Find the lower and move to the next line minimum_pos = min(sig_2) goto_next_line = [x == minimum_pos for x in sig_2] continue #Unknown error.. raise Exception("Unknown error in SNP signatures:", str(signatures)) composite_line = reduce(lambda x,y: x+ y, [x[5:] for x in gprobs_files_ls]) to_print = signatures[0] for index in indexes: to_print += composite_line[(index*3):(index*3)+3] output_file.write(str.join(" ", to_print) + "\n") if output_info_filename: common_r2 = sum([(bin_sizes[info_index] * float(x[5])) / total_samples for info_index, x in enumerate(info_files_ls)]) output_info_file.write(str.join(" ", info_files_ls[0][0:5] + [str(common_r2)] + info_files_ls[0][6:] ) + "\n") #Close all gprobs files [file.close() for file in gprobs_files] #Close output file output_file.close() if output_info_filename: output_info_file.close() print "SNPs found in study for part of the bins:", snp_found_problems print "SNPs not found in all the bins:", snp_bin_problems
[edit] Unit Tests
def uni1(): return True
[edit] Development Code
def Assembly_impute2_gprobs_bins_user_Kantale(): pass
[edit] Permissions
[edit] Documentation Permissions
Kantale
[edit] Code Permissions
Kantale
[edit] Unit Tests Permissions
Kantale