The core functions for linkage analysis. Paramlink2 is used to do linkage analysis. The R code is bridged to python through rpy2. It run linkage analysis from batch to batch (default is 25 genes per batch).
with open('../data/wg20220311/chr19test/CACHE/chr19test43.pickle', 'rb') as handle:
genes = pickle.load(handle)
haps=genes['APOE']['predata']['1007']
hap=haps[2][:,2:]
hap
recombination_pos(hap)
recombination_region(hap)
hap2chp(haps,'APOE')
with open('../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test0.pickle', 'rb') as handle:
genes1 = pickle.load(handle)
genes1['POTEH']['predata']['10J_128:0:0']
tmp=update_haps_ped(genes1)
with open('../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test0_AFcutoff0.0_linkage.lods', 'rb') as handle:
res = pickle.load(handle)
var_res=format_fam_lods(res.values(),prefix=True)
start = time.perf_counter()
var_sovs,best_sovs=[],[]
for var,res in var_res.items():
res=res.fillna(0)
best_sov=[var,'LOD0.5',0,0]
for theta in res.index:
try:
sov = minimize_scalar(hlod_fun(list(res.loc[theta]), -1), bounds=(0,1), method='bounded', options={'xatol':1e-8})
var_sov=[var,theta,sov.x,-sov.fun]
except:
var_sov=[var,theta,0,0]
var_sovs.append(var_sov)
if best_sov[3]<var_sov[3]:
best_sov=var_sov
best_sovs.append(best_sov)
print(time.perf_counter()-start)
var_sovs=pd.DataFrame(var_sovs)
best_sovs=pd.DataFrame(best_sovs)
def sum_variant_lods(lods):
variants = {}
for lod in lods:
for m,l in zip(lod['MARKER'],lod['LOD']):
if m in variants.keys():
variants[m] += l
else:
variants[m] = l
var_lst = []
for var,lod in variants.items():
snp = var[:-3]
var_lst.append(snp.split(':')+[snp,lod])
variants=pd.DataFrame(var_lst,columns=['CHR','POS','A0','A1','SNP','LOD'])
variants.POS = variants.POS.astype(int)
variants.sort_values('POS')
return variants
test linkage_analysis
fam_vcf='../data/wg20220520trimed/fam17_vcf_rmfounderwithoutvcf.pickle'
fam_path='../data/new_trim_ped_rmfounderwithoutvcf.fam'
if os.path.isfile(fam_vcf):
with open(fam_vcf, 'rb') as handle:
fam17_vcf = pickle.load(handle)
fam17 = pd.read_csv(fam_path,delim_whitespace=True,header=None,names=['fid','iid','fathid','mothid','sex','ad'])
fam17.index = list(fam17.iid)
fam17.ad[fam17.ad==-9]=0
fam17_d = {}
cutoff=0.0
for i in fam17.fid.unique():
fam17_d[i] = fam17[fam17.fid==i]
gene_genotype_file='../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test0.pickle'
linkage_analysis(gene_genotype_file,fam17_d,fam17_vcf,cutoff,chp=False)
gene_genotype_file='../data/wg20220725raretrimmed/chr22test/tmp/CACHE/chr22test3.pickle'
linkage_analysis(gene_genotype_file,fam17_d,fam17_vcf,cutoff,chp=False)
gene_genotype_file='../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test0.pickle'
linkage_analysis(gene_genotype_file,fam17_d,fam17_vcf,cutoff,chp=True)
gene_genotype_file='../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/chr22test1.pickle'
linkage_analysis(gene_genotype_file,fam17_d,fam17_vcf,cutoff,chp=True)
summarize_lods('../data/wg20220725raretrimmed_phase/chr22test/tmp/CACHE/*_linkage.lods','../data/wg20220725raretrimmed_phase/chr22test',regions,phase=True)
summarize_lods('../data/wg20220725raretrimmed/chr22test/tmp/CACHE/*_linkage.lods','../data/wg20220725raretrimmed/chr22test',regions,phase=False)