Create random subset of beagle user Kantale
From PyPedia
Contents |
[edit] Documentation
Divides a beagle file into two datasets. The subset that contains the percentage% of the markers and the rest that contains the rest markers.
[edit] Parameters
<inputs> </inputs>
[edit] Return
[edit] See also
Create random subset of Impute2
[edit] Code
import random def Create_random_subset_of_beagle_user_Kantale( input_beagle_filename = None, input_markers_filename = None, percentage = None, output_beagle_rest_filename = None, output_markers_rest_filename = None, output_beagle_subset_filename = None, output_markers_subset_filename = None, input_subset_filename = None, output_subset_filename = None, input_subset_field = None, # Either "line", "position" ): input_beagle_file = open(input_beagle_filename) output_beagle_rest_file = open(output_beagle_rest_filename, "w") output_markers_rest_file = open(output_markers_rest_filename, "w") output_beagle_subset_file = open(output_beagle_subset_filename, "w") output_markers_subset_file = open(output_markers_subset_filename, "w") #Get number of numbers in beagle file markers = 0 while True: line = input_beagle_file.readline() if not line: break line_s = line.replace("\n", "").split() if line_s[0] != "M": output_beagle_rest_file.write(line) output_beagle_subset_file.write(line) else: markers += 1 input_beagle_file.close() if input_subset_filename: subset_lines = [] input_subset_file = open(input_subset_filename) while True: line = input_subset_file.readline() if not line: break subset_lines += [int(line.replace("\n", ""))] input_subset_file.close() else: subset_lines_n = int(percentage * float(markers)) lines = range(markers) random.shuffle(lines) subset_lines = lines[0:subset_lines_n] if output_subset_filename: output_subset_file = open(output_subset_filename, "w") for x in subset_lines: output_subset_file.write(str(x) + "\n") output_subset_file.close() input_beagle_file = open(input_beagle_filename) input_markers_file = open(input_markers_filename) current_marker = 0 while True: line = input_beagle_file.readline() if not line: break line_s = line.replace("\n", "").split() if line_s[0] != "M": pass else: line_markers = input_markers_file.readline() line_markers_s = line_markers.replace("\n", "").split() current_position = int(line_markers_s[1]) if input_subset_field == "line": to_check = current_marker elif input_subset_field == "position": to_check = current_position else: raise Exception("Unknown value for parameter input_subset_field: " + str(input_subset_field)) if to_check in subset_lines: output_beagle_subset_file.write(line) output_markers_subset_file.write(line_markers) else: output_beagle_rest_file.write(line) output_markers_rest_file.write(line_markers) current_marker += 1 output_beagle_rest_file.close() output_markers_rest_file.close() output_beagle_subset_file.close() output_markers_subset_file.close()
[edit] Unit Tests
def uni1(): return True
[edit] Development Code
def Create_random_subset_of_beagle_user_Kantale(): pass
[edit] Permissions
[edit] Documentation Permissions
Kantale
[edit] Code Permissions
Kantale
[edit] Unit Tests Permissions
Kantale