The core functions for linkage analysis. Paramlink2 is used to do linkage analysis. The R code is bridged to python through rpy2. It run linkage analysis from batch to batch (default is 25 genes per batch).

Functions for preprocessing genotypes or phased haplotypes

  • input haps
  • output preprocessed inputs of chp markers

update_haps_ped[source]

update_haps_ped(genes)

hap2chp[source]

hap2chp(haps, gene)

input hap is [varnames,freqs,halpotypes] output is [chp_varnames,chp_freqs,chps]

generate_marker[source]

generate_marker(alleles)

array of 0,1,2. 0 if all 0 -> 2 if any 2 else 1

recombination_pos[source]

recombination_pos(hap)

recombination_region[source]

recombination_region(hap)

test

with open('../data/wg20220311/chr19test/CACHE/chr19test43.pickle', 'rb') as handle:
    genes = pickle.load(handle)
haps=genes['APOE']['predata']['1007']
hap=haps[2][:,2:]
hap
array([['2:', '1:', 'A1,2:', '1:'],
       ['2:', '2:', '1:', '2:'],
       ['1:', '1:', 'A2,1:', '1:'],
       ['2:', '2:', '1:', '2:'],
       ['2:', '1:', 'A1,2:', '1:'],
       ['?:', '?:', '?:', '?:'],
       ['2:', '1/', 'A2,1|', '1\\'],
       ['2:', '1/', 'A1,2|', '2\\'],
       ['2:', '2|', '1|', '2|'],
       ['2:', '2|', '1|', '2|'],
       ['1:', '1|', 'A2,1|', '1|'],
       ['2:', '1|', 'A1,2|', '1|'],
       ['1:', '1|', 'A2,1|', '1|'],
       ['2:', '1|', 'A1,2|', '1|'],
       ['1:', '1|', 'A2,1|', '1|'],
       ['2:', '1|', 'A1,2|', '1|'],
       ['1:', '1|', 'A2,1|', '1|'],
       ['2:', '1|', 'A1,2|', '1|']], dtype='<U7')
recombination_pos(hap)
[0, 1, 3]
recombination_region(hap)
[(0, 1), (1, 3)]
hap2chp(haps,'APOE')
[array(['APOE:0', 'APOE:1'], dtype='<U6'),
 array([0.4113    , 0.37500985]),
 array([['1007', '1007_1', '2:', '1:'],
        ['1007', '1007_1', '2:', '2:'],
        ['1007', '1007_2', '1:', '2:'],
        ['1007', '1007_2', '2:', '2:'],
        ['1007', '1007_40', '2:', '1:'],
        ['1007', '1007_40', '0:', '0:'],
        ['1007', '1007_99', '2:', '2:'],
        ['1007', '1007_99', '2:', '1:'],
        ['1007', '1007_5', '2:', '2:'],
        ['1007', '1007_5', '2:', '2:'],
        ['1007', '1007_3', '1:', '2:'],
        ['1007', '1007_3', '2:', '1:'],
        ['1007', '1007_6', '1:', '2:'],
        ['1007', '1007_6', '2:', '1:'],
        ['1007', '1007_4', '1:', '2:'],
        ['1007', '1007_4', '2:', '1:'],
        ['1007', '1007_39', '1:', '2:'],
        ['1007', '1007_39', '2:', '1:']], dtype='<U7')]
with open('../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test0.pickle', 'rb') as handle:
    genes1 = pickle.load(handle)
genes1['POTEH']['predata']['10J_128:0:0']
[array(['chr22:15690532:A:G'], dtype='<U18'),
 array([0.0021]),
 array([['10J_128:0:0', '10J_128_7', '1:'],
        ['10J_128:0:0', '10J_128_7', '1:'],
        ['10J_128:0:0', '10J_128_227', '2:'],
        ['10J_128:0:0', '10J_128_227', '?:'],
        ['10J_128:0:0', '10J_128_111', '1:'],
        ['10J_128:0:0', '10J_128_111', '2:']], dtype='<U11')]
tmp=update_haps_ped(genes1)

Functions for formating the input of linkage analysis

get_allele[source]

get_allele(s)

name_haps[source]

name_haps(snps)

get_fam_hap[source]

get_fam_hap(haps, variants, vcf=None)

get_fam_geno[source]

get_fam_geno(haps, variants, vcf=None)

format_haps_bunch[source]

format_haps_bunch(dhaps, fam, vcfs=None, cutoff=None, haplotype=True)

test

Functions of heterogeneity

hlod_fun[source]

hlod_fun(Li, sign=1)

Functions for linkage analysis

calculate_ped_lod[source]

calculate_ped_lod(ped, afreq=None, rho=0, model='AD', chrom='AUTOSOMAL', penetrances=[0.01, 0.9, 0.9], dfreq=0.001)

parallel_lods[source]

parallel_lods(haps, afreqs=None, rho=0, model='AD', chrom='AUTOSOMAL', penetrances=[0.01, 0.9, 0.9], dfreq=0.001)

linkage_analysis[source]

linkage_analysis()

linkage analysis function

test

with open('../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test0_AFcutoff0.0_linkage.lods', 'rb') as handle:
    res = pickle.load(handle)
var_res=format_fam_lods(res.values(),prefix=True)
start = time.perf_counter()
var_sovs,best_sovs=[],[]
for var,res in var_res.items():
    res=res.fillna(0)
    best_sov=[var,'LOD0.5',0,0]
    for theta in res.index:
        try:
            sov = minimize_scalar(hlod_fun(list(res.loc[theta]), -1), bounds=(0,1), method='bounded', options={'xatol':1e-8})
            var_sov=[var,theta,sov.x,-sov.fun]
        except:
            var_sov=[var,theta,0,0]
        var_sovs.append(var_sov)
        if best_sov[3]<var_sov[3]: 
            best_sov=var_sov
    best_sovs.append(best_sov)
print(time.perf_counter()-start)
var_sovs=pd.DataFrame(var_sovs)
best_sovs=pd.DataFrame(best_sovs)
0.46859060414135456
def sum_variant_lods(lods):
    variants = {}
    for lod in lods:
        for m,l in zip(lod['MARKER'],lod['LOD']):
            if m in variants.keys():
                variants[m] += l
            else:
                variants[m] = l
    var_lst = []
    for var,lod in variants.items():
        snp = var[:-3]
        var_lst.append(snp.split(':')+[snp,lod])
    variants=pd.DataFrame(var_lst,columns=['CHR','POS','A0','A1','SNP','LOD'])
    variants.POS = variants.POS.astype(int)
    variants.sort_values('POS')
    return variants
fam_vcf='../data/wg20220520trimed/fam17_vcf_rmfounderwithoutvcf.pickle'
fam_path='../data/new_trim_ped_rmfounderwithoutvcf.fam'
if os.path.isfile(fam_vcf):
    with open(fam_vcf, 'rb') as handle:
        fam17_vcf = pickle.load(handle)
fam17 = pd.read_csv(fam_path,delim_whitespace=True,header=None,names=['fid','iid','fathid','mothid','sex','ad'])
fam17.index = list(fam17.iid)
fam17.ad[fam17.ad==-9]=0
fam17_d = {}
cutoff=0.0
for i in fam17.fid.unique():
    fam17_d[i] = fam17[fam17.fid==i]
gene_genotype_file='../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test0.pickle'
linkage_analysis(gene_genotype_file,fam17_d,fam17_vcf,cutoff,chp=False)
exist! jump ../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test0_AFcutoff0.0_linkage.input
exist! jump ../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test0_AFcutoff0.0_linkage.lods
create ../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test0_AFcutoff0.0_linkage.besthlod
gene_genotype_file='../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test3.pickle'
linkage_analysis(gene_genotype_file,fam17_d,fam17_vcf,cutoff,chp=False)
exist! jump ../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test3_AFcutoff0.0_linkage.input
exist! jump ../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test3_AFcutoff0.0_linkage.lods
create ../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test3_AFcutoff0.0_linkage.besthlod
gene_genotype_file='../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test0.pickle'
linkage_analysis(gene_genotype_file,fam17_d,fam17_vcf,cutoff,chp=True)
exist! jump ../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test0_AFcutoff0.0_linkage.input
exist! jump ../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test0_AFcutoff0.0_linkage.lods
create ../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test0_AFcutoff0.0_linkage.besthlod
gene_genotype_file='../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test1.pickle'
linkage_analysis(gene_genotype_file,fam17_d,fam17_vcf,cutoff,chp=True)
exist! jump ../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test1_AFcutoff0.0_linkage.input
exist! jump ../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test1_AFcutoff0.0_linkage.lods
create ../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test1_AFcutoff0.0_linkage.besthlod

Functions for summarizing results

format_fam_lods[source]

format_fam_lods(res, cutoff=0, prefix=False)

rows2one[source]

rows2one(x)

get_lods_batch[source]

get_lods_batch(path, fams=None, phase=False)

get_lods_chrom[source]

get_lods_chrom(prefix, fams=None, phase=False)

get_hlod_chrom[source]

get_hlod_chrom(prefix)

summarize_lods[source]

summarize_lods(input_lod, output_prefix, regions, fams=None, phase=False)

two output files:one is lod scores from rho 0 to 0.5. another is lod at rho=0 and max lod score combined with chr, pos and name

test

summarize_lods('../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/*_linkage.lods','../data/wg20220725raretrimmed_phase/chr22test',regions,phase=True)
/tmp/2742267.1.high_mem.q/ipykernel_32628/1320168421.py:26: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  genes.LODmax[genes.LODmax<0]=0
summarize_lods('../data/wg20220725raretrimmed/chr22test/tmp/CACHE/*_linkage.lods','../data/wg20220725raretrimmed/chr22test',regions,phase=False)
/tmp/2742267.1.high_mem.q/ipykernel_32628/1320168421.py:26: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  genes.LODmax[genes.LODmax<0]=0