Create random subset of beagle user Kantale

From PyPedia
Jump to: navigation, search

Contents

[edit] Documentation

Divides a beagle file into two datasets. The subset that contains the percentage% of the markers and the rest that contains the rest markers.


[edit] Parameters

<inputs>
</inputs>


[edit] Return

[edit] See also

Create random subset of Impute2

[edit] Code

import random

def Create_random_subset_of_beagle_user_Kantale(
	input_beagle_filename = None,
	input_markers_filename = None,
	percentage = None,
	output_beagle_rest_filename = None,
	output_markers_rest_filename = None,
	output_beagle_subset_filename = None,
	output_markers_subset_filename = None,
	input_subset_filename = None,
	output_subset_filename = None,
	input_subset_field = None, # Either "line", "position"
):
	input_beagle_file = open(input_beagle_filename)
	output_beagle_rest_file = open(output_beagle_rest_filename, "w")
	output_markers_rest_file = open(output_markers_rest_filename, "w")
	output_beagle_subset_file = open(output_beagle_subset_filename, "w")
	output_markers_subset_file = open(output_markers_subset_filename, "w")
	
	#Get number of numbers in beagle file
	markers = 0
	while True:
		line = input_beagle_file.readline()
		if not line: break

		line_s = line.replace("\n", "").split()

		if line_s[0] != "M":
			output_beagle_rest_file.write(line)
			output_beagle_subset_file.write(line)
		else:
			markers += 1

	input_beagle_file.close()

	if input_subset_filename:
		subset_lines = []
		input_subset_file = open(input_subset_filename)
		while True:
			line = input_subset_file.readline()
			if not line:
				break
			subset_lines += [int(line.replace("\n", ""))]
		input_subset_file.close()
	else:
		subset_lines_n = int(percentage * float(markers))
		lines = range(markers)
		random.shuffle(lines)
		subset_lines = lines[0:subset_lines_n]

	if output_subset_filename:
		output_subset_file = open(output_subset_filename, "w")
		for x in subset_lines:
			output_subset_file.write(str(x) + "\n")
		output_subset_file.close()

	input_beagle_file = open(input_beagle_filename)
	input_markers_file = open(input_markers_filename)
	current_marker = 0
	while True:
		line = input_beagle_file.readline()
		if not line: break

		line_s = line.replace("\n", "").split()
		if line_s[0] != "M":
			pass
		else:
			line_markers = input_markers_file.readline()
			line_markers_s = line_markers.replace("\n", "").split()
			
			current_position = int(line_markers_s[1])
			
			if input_subset_field == "line":
				to_check = current_marker
			elif input_subset_field == "position":
				to_check = current_position
			else:
				raise Exception("Unknown value for parameter input_subset_field: " + str(input_subset_field))
			
			if to_check in subset_lines:
				output_beagle_subset_file.write(line)
				output_markers_subset_file.write(line_markers)
			else:
				output_beagle_rest_file.write(line)
				output_markers_rest_file.write(line_markers)
			current_marker += 1

	output_beagle_rest_file.close()
	output_markers_rest_file.close()
	output_beagle_subset_file.close()
	output_markers_subset_file.close()

[edit] Unit Tests

def uni1():
	return True

[edit] Development Code

def Create_random_subset_of_beagle_user_Kantale():
	pass

[edit] Permissions

[edit] Documentation Permissions

Kantale

[edit] Code Permissions

Kantale

[edit] Unit Tests Permissions

Kantale

[edit] Permissions Permissions

Kantale

Personal tools
Namespaces

Variants
Actions
Navigation
Toolbox