Convert tab file to VCF user Kantale
From PyPedia
Contents |
[edit] Documentation
Converts a tabular file into VCF format (http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-40). The dictionary tab_format contains the column indexes and potential lambda function for column processing.
Example:
For the file:
chr10 83945 G R 99 G 35 50 73 A 32 28 36 109 0.0418952 1.55963 1 73 chr10 84018 T Y 47 T 35 33 36 C 36 5 5 41 0.725034 1.12195 1 73 chr10 84545 C Y 99 C 35 16 16 T 33 15 15 31 0.730608 1.00000 1 527 chr10 272897 C T 85 T 35 22 22 C 0 0 0 22 1.00000 1.00000 1 2303 chr10 282927 C A 83 A 36 23 23 C 0 0 0 23 1.00000 1.00000 1 1218
Use the following code
import pypedia from pypedia import Convert_tab_file_to_VCF tab_format = {"chromosome" : 0, "position" :1, "rs":None, "reference" : 2, "chr1": 5, "chr2" : 9} return Convert_tab_file_to_VCF_user_Kantale("INPUT_TAB_FILE", tab_format)
if the tab_format is omitted then the following format is assumed:
Chromosome position rs_code reference chr1 chr2
In that case, an example input could be:
10 83945 rs1 G A A 10 83361 rs2 G G A 10 89253 rs3 G G G
This method generates a vcf file withe a single sample.
[edit] Parameters
<inputs> <param name="input_filename" value="" type="data" label="Input tab separated file "/> <param name="output_filename" value="" type="data" label="Output VCF filename "/> </inputs>
[edit] Return
The output filename
[edit] See also
[edit] Code
import os def Convert_tab_file_to_VCF_user_Kantale( input_filename=None, tab_format=None, output_filename=None ): if not tab_format: tab_format = {"chromosome" : 0, "position" :1, "rs": 2, "reference" : 3, "chr1": 4, "chr2" : 5} header, header_str = Default_VCF_header_user_Kantale(None, [input_filename]) file = open(input_filename) output = open(output_filename, "w") output.write(header_str) currentLine = 0 while True: line = file.readline() if not line: break currentLine += 1 if currentLine % 10000 == 0: print "Lines:", currentLine, Print_now_user_Kantale() toPrint = [] if line.strip() == "": continue lineSplitted = line.replace("\n", "").split() toPrint += [lineSplitted[tab_format["chromosome"]].replace("chr", "")] #chromosome toPrint += [lineSplitted[tab_format["position"]]] #position if not tab_format.has_key("rs"): toPrint += ["."] else: toPrint += [lineSplitted[tab_format["rs"]]] #ID if tab_format.has_key("reference"): ref = lineSplitted[tab_format["reference"]] else: ref = "A" toPrint += [ref] # reference if tab_format.has_key("chr1"): alt1 = lineSplitted[tab_format["chr1"]] else: alt1 = "C" if tab_format.has_key("chr2"): alt2 = lineSplitted[tab_format["chr2"]] else: alt2 = "C" alternativeToPrint = [] if alt1 != ref: alternativeToPrint += [alt1] #Alternative if alt2 not in [alt1, ref]: alternativeToPrint += [alt2] #Alternative if len(alternativeToPrint) == 0: toPrint += ['.'] else: toPrint += [str.join(",", alternativeToPrint)] #Alternative toPrint += ["100", "Q20", "NS=1", "GT"] genotype = ("0" if alt1 == ref else "1") + "/" + ("0" if alt2 == ref else "1") toPrint += [genotype] # output.write( str.join("\t", toPrint) + "\n") file.close() output.close() return output_filename
[edit] Unit Tests
def uni1(): return True
[edit] Development Code
def Convert_tab_file_to_VCF_user_Kantale(): pass
[edit] Permissions
[edit] Documentation Permissions
Kantale
[edit] Code Permissions
Kantale
[edit] Unit Tests Permissions
Kantale