Pairwise linkage disequilibrium
From PyPedia
Contents |
[edit] Documentation
Computes the linkage disequilibrium between two SNPs. Computes the r-squared, D' and the estimated haplotype frequencies. It has been developed to produce the same values as plink with --ld option. (http://pngu.mgh.harvard.edu/~purcell/plink/ld.shtml)
Parameters:
- SNP_1 : A tuple with the alleles of the first SNP. For example (("A","A"), ("A","G"), ("G","G"))
- SNP_2 : A tuple with the alleles of the secons SNP. for example (("A","T"), ("A","A"), ("A","A"))
- See also: http://rannala.org/books/CUPChap3.pdf
[edit] Parameters
<inputs> <param name="SNP_1" type="eval" value="[('A','A'), ('A','G'), ('G','G'), ('G','A')]" label="Tuple with the first SNP: "/> <param name="SNP_2" type="eval" value="[('A','A'), ('A','G'), ('G','G'), ('A','A')]" label="Tuple with the second SNP: "/> </inputs>
[edit] Return
A dictionary with the following keys:
- "haplotypes" : A tuple with three values: (1) the estimated haplotype, (2) the haplotype frequencies and (3) frequencies expectation under linkage equilibrium.
- "R_sq" : The r-square.
- "Dprime" : The D prime.
[edit] See also
- Articles called: SNP_alleles, Minor_allele_frequency
[edit] Code
def Pairwise_linkage_disequilibrium( SNP_1 = None, SNP_2 = None, ): """ >>> print Pairwise_linkage_disequilibrium([('A','A'), ('A','G'), ('G','G'), ('G','A')], [('A','A'), ('A','G'), ('G','G'), ('A','A')]) {'haplotypes': [('AA', 0.499999999973935, 0.3125), ('AG', 2.606498430642265e-11, 0.1875), ('GA', 0.12500000002606498, 0.3125), ('GG', 0.374999999973935, 0.1875)], 'R_sq': 0.5999999998331841, 'Dprime': 0.9999999998609868} """ ret = {} (SNP_1_allele_1, SNP_1_allele_2) = SNP_alleles(SNP_1) (SNP_2_allele_1, SNP_2_allele_2) = SNP_alleles(SNP_2) Haplotype_1_1 = [x[0] for x in SNP_1] Haplotype_1_2 = [x[1] for x in SNP_1] Haplotype_2_1 = [x[0] for x in SNP_2] Haplotype_2_2 = [x[1] for x in SNP_2] MAF_1 = Minor_allele_frequency(SNP_1_allele_1, SNP_1_allele_2, Haplotype_1_1, Haplotype_1_2) MAF_2 = Minor_allele_frequency(SNP_2_allele_1, SNP_2_allele_2, Haplotype_2_1, Haplotype_2_2) if MAF_1[1] == SNP_1_allele_1: freq_1_1 = MAF_1[0] freq_1_2 = 1.0 - freq_1_1 elif MAF_1[1] == SNP_1_allele_2: freq_1_2 = MAF_1[0] freq_1_1 = 1.0 - freq_1_2 if MAF_2[1] == SNP_2_allele_1: freq_2_1 = MAF_2[0] freq_2_2 = 1.0 - freq_2_1 elif MAF_2[1] == SNP_2_allele_2: freq_2_2 = MAF_2[0] freq_2_1 = 1.0 - freq_2_2 freq_A_B = freq_1_1 * freq_2_1 freq_A_b = freq_1_1 * freq_2_2 freq_a_B = freq_1_2 * freq_2_1 freq_a_b = freq_1_2 * freq_2_2 Haps = zip(Haplotype_1_1, Haplotype_1_2, Haplotype_2_1, Haplotype_2_2) N11 = len([1 for h11, h12, h21, h22 in Haps if h11 == SNP_1_allele_1 and h12 == SNP_1_allele_1 and h21 == SNP_2_allele_1 and h22 == SNP_2_allele_1]) N21 = len([1 for h11, h12, h21, h22 in Haps if ((h11 == SNP_1_allele_1 and h12 == SNP_1_allele_2) or (h11 == SNP_1_allele_2 and h12 == SNP_1_allele_1)) and h21 == SNP_2_allele_1 and h22 == SNP_2_allele_1]) N31 = len([1 for h11, h12, h21, h22 in Haps if h11 == SNP_1_allele_2 and h12 == SNP_1_allele_2 and h21 == SNP_2_allele_1 and h22 == SNP_2_allele_1]) N12 = len([1 for h11, h12, h21, h22 in Haps if h11 == SNP_1_allele_1 and h12 == SNP_1_allele_1 and ((h21 == SNP_2_allele_1 and h22 == SNP_2_allele_2) or (h21 == SNP_2_allele_2 and h22 == SNP_2_allele_1))]) N22 = len([1 for h11, h12, h21, h22 in Haps if ((h11 == SNP_1_allele_1 and h12 == SNP_1_allele_2) or (h11 == SNP_1_allele_2 and h12 == SNP_1_allele_1)) and ((h21 == SNP_2_allele_1 and h22 == SNP_2_allele_2) or (h21 == SNP_2_allele_2 and h22 == SNP_2_allele_1))]) N32 = len([1 for h11, h12, h21, h22 in Haps if h11 == SNP_1_allele_2 and h12 == SNP_1_allele_2 and ((h21 == SNP_2_allele_1 and h22 == SNP_2_allele_2) or (h21 == SNP_2_allele_2 and h22 == SNP_2_allele_1))]) N13 = len([1 for h11, h12, h21, h22 in Haps if h11 == SNP_1_allele_1 and h12 == SNP_1_allele_1 and h21 == SNP_2_allele_2 and h22 == SNP_2_allele_2]) N23 = len([1 for h11, h12, h21, h22 in Haps if ((h11 == SNP_1_allele_1 and h12 == SNP_1_allele_2) or (h11 == SNP_1_allele_2 and h12 == SNP_1_allele_1)) and h21 == SNP_2_allele_2 and h22 == SNP_2_allele_2]) N33 = len([1 for h11, h12, h21, h22 in Haps if h11 == SNP_1_allele_2 and h12 == SNP_1_allele_2 and h21 == SNP_2_allele_2 and h22 == SNP_2_allele_2]) N1_ = N11 + N12 + N13 N2_ = N21 + N22 + N23 N3_ = N31 + N32 + N33 N_1 = N11 + N21 + N31 N_2 = N12 + N22 + N32 N_3 = N13 + N23 + N33 N = N1_ + N2_ + N3_ pAB = 0.25 pab = 0.25 pAb = 0.25 paB = 0.25 for nn in xrange(10): nAB = (2*N11) + N12 + N21 + (( (pAB*pab) / ((pAb*paB) + (pAB*pab)) ) * N22) nab = (2*N33) + N23 + N32 + (( (pAB*pab) / ((pAb*paB) + (pAB*pab)) ) * N22) nAb = (2*N13) + N12 + N23 + (( (pAb*paB) / ((pAb*paB) + (pAB*pab)) ) * N22) naB = (2*N31) + N21 + N32 + (( (pAb*paB) / ((pAb*paB) + (pAB*pab)) ) * N22) pAB = nAB / (2.0 * N) pab = nab / (2.0 * N) pAb = nAb / (2.0 * N) paB = naB / (2.0 * N) ret["haplotypes"] = [ (SNP_1_allele_1 + SNP_2_allele_1, pAB, freq_A_B), (SNP_1_allele_1 + SNP_2_allele_2, pAb, freq_A_b), (SNP_1_allele_2 + SNP_2_allele_1, paB, freq_a_B), (SNP_1_allele_2 + SNP_2_allele_2, pab, freq_a_b), ] Observed = [pAB, pAb, paB, pab] Expected = [freq_A_B, freq_A_b, freq_a_B, freq_a_b] R_sq = sum([((o-e)*(o-e))/e for o,e in zip(Observed, Expected)]) ret["R_sq"] = R_sq pA = pAB + pAb pB = pAB + paB D = pAB - (pA*pB) if D>=0: Dmax = min(pA * (1-pB), (1-pA)*pB) else: Dmax = min(pA * pB, (1-pA)*(1-pB)) Dprime = D / Dmax ret["Dprime"] = Dprime return ret
[edit] Unit Tests
def uni1(): import doctest doctest.run_docstring_examples(Pairwise_linkage_disequilibrium, globals())
[edit] Development Code
def Pairwise_linkage_disequilibrium(): pass
[edit] Permissions
[edit] Documentation Permissions
Kantale
[edit] Code Permissions
Kantale
[edit] Unit Tests Permissions
Kantale