Parameters
path0='./'
path1=${path0}'op1_range/'
path2=${path0}'inform/'
path3=${path0}'cmd_list/'
path4=${path0}'temp/'
file1='GROUP'
file2='.option1'
file3='.op1'
file4='.cmd'
gene_list=${file1}'_sel_gene.list'
file0='.gene.snp0.99.maf0.05'
file00='.gene.snp.maf'
Preparation
mkdir ${path4}
while IFS='' read -r line
do
echo "cat ${path0}Exome.bed |grep -w ${line} > ${path4}Exome_gene_${line}"
echo "cat ${path0}${file1}.ann |grep -w ${line} > ${path4}${file1}_gene_${line}"
done < ${path0}${gene_list} > ${path0}'cmd_sel_gene'
parallel {} < ${path0}'cmd_sel_gene'
rm ${path0}'cmd_sel_gene'
cat ${path4}Exome_gene_* > ${path4}Exome_sel_gene.bed
cat ${path4}${file1}_gene_* > ${path4}${file1}_sel_gene.ann
sort -V -k 1 ${path4}Exome_sel_gene.bed > ${path4}Exome_sel_gene.tmp
awk -F"chr" '{print $2}' ${path4}Exome_sel_gene.tmp > ${path0}Exome_sel_genes.bed
sort -V -k 1 ${path4}${file1}_sel_gene.ann > ${path0}${file1}_sel_genes.ann
rm -r ${path4}
awk -F"chr" '{print $2" "$3}' ${path0}${file1}_sel_genes.ann|awk '{print $3"_"$5"\t"$2}' > ${path0}${file1}_sel_genes_short.ann
Replace variant name
for i in {1..22} X Y;do echo "mv ${path0}${file1}_chr${i}${file2}.bim ${path0}${file1}_chr${i}${file2}.bim.bak && awk '{print \$1\"\t\"\$1\"_\"\$2\"\t\"\$3\"\t\"\$4\"\t\"\$5\"\t\"\$6}' ${path0}${file1}_chr${i}${file2}.bim.bak > ${path0}${file1}_chr${i}${file2}.bim";done > ${path0}replace.tmp
parallel {} < ${path0}replace.tmp
rm ${path0}replace.tmp
for i in {1..22} X Y;do echo "plink --bfile ${path0}${file1}_chr${i}${file2} --extract /maha3/WYC/ADNI/Exome_sel_genes.bed --range --make-bed --out ${path1}${file1}_chr${i}${file3}.gene";done > ${path3}${file1}${file3}.gene.cmd
parallel {} < ${path3}${file1}${file3}.gene.cmd
for i in {1..22} X Y;do wc -l ${path1}${file1}_chr${i}${file3}.gene.bim ;done > ${path2}${file1}${file3}.gene.inf0
awk -F"[chr .]" '{print $7"\t"$1}' ${path2}${file1}${file3}.gene.inf0 > ${path2}${file1}${file3}.genes.inf
rm ${path2}${file1}${file3}.*.inf0
Remove missings
# Remove variants and samples which have missing genotypes with 0.10 to 0.01 frequencies.
for i in {1..22} X Y;do echo "plink --bfile ${path1}${file1}_chr${i}${file3}.gene --geno 0.99 --make-bed --out ${path1}${file1}_chr${i}${file3}.gene.snp0.99";done > ${path3}${file1}${file3}.gene.snp.cmd
parallel {} < ${path3}${file1}${file3}.gene.snp.cmd
for i in {1..22} X Y;do wc -l ${path1}${file1}_chr${i}${file3}.gene.snp0.99.bim ;done > ${path2}${file1}${file3}.gene.snp.inf0
awk -F"[chr .]" '{print $7"\t"$1}' ${path2}${file1}${file3}.gene.snp.inf0 > ${path2}${file1}${file3}.genes.snp.inf
rm ${path2}${file1}${file3}.*.inf0
Filter by maf
for i in {1..22} X Y;do echo "plink --bfile ${path1}${file1}_chr${i}${file3}.gene.snp0.99 --maf 0.05 --make-bed --out ${path1}${file1}_chr${i}${file3}.gene.snp0.99.maf0.05";done > ${path3}${file1}${file3}.gene.snp.maf.cmd
parallel {} < ${path3}${file1}${file3}.gene.snp.maf.cmd
for i in {1..22} X Y;do wc -l ${path1}${file1}_chr${i}${file3}.gene.snp0.99.maf0.05.bim ;done > ${path2}${file1}${file3}.gene.snp.maf.inf0
awk -F"[chr .]" '{print $7"\t"$1}' ${path2}${file1}${file3}.gene.snp.maf.inf0 > ${path2}${file1}${file3}.genes.snp.maf.inf
rm ${path2}${file1}${file3}.*.inf0
Merge files
for i in {1..22};do echo "${path1}${file1}_chr${i}${file3}${file0}.bed ${path1}${file1}_chr${i}${file3}${file0}.bim ${path1}${file1}_chr${i}${file3}${file0}.fam";done > ${path3}${file1}${file3}${file0}.merge.lst
plink --bfile ${path1}${file1}_chr1${file3}${file0} --merge-list ${path3}${file1}${file3}${file0}.merge.lst --make-bed --out ${path1}${file1}${file3}${file0}.merged
for i in {1..22} X Y;do wc -l ${path1}${file1}_chr${i}${file3}.gene.snp95.maf0.05.bim ;done > ${path2}${file1}${file3}.gene.snp.maf.inf0
awk -F"[chr .]" '{print $7"\t"$1}' ${path2}${file1}${file3}.gene.snp.maf.inf0 > ${path2}${file1}${file3}.genes.snp.maf.inf
rm ${path2}${file1}${file3}.*.inf0