Подготовка выровненных ридов к определения вариантов

Материал из Институт биоинформатики
Перейти к: навигация, поиск

Маркировка дупликатов

#!/bin/bash

# Marking duplicate reads in BAM files

mkdir picard_logs

for BAM in *.sorted.bam
do
    while [ $(jobs | wc -l) -ge 12 ] ; do sleep 1; done
    echo Dedupping $BAM; java -Xmx2g -jar /Molly/barbitoff/software/picard-tools-2.0.1/picard.jar MarkDuplicates I=$BAM O=${BAM%%.sorted.bam}.dedup.bam M=picard_logs/${BAM%%.sorted.bam}.metrics &> picard_logs/${BAM%%.sorted.bam}.log &
done
wait

echo 'Finished marking duplicates, ready for calculating HSMetrics!'

Сбор метрик для анализа обогащения

#!/bin/bash

# Merging BAMs for each sample and piping into HsMetrics

PLATFORM=$1
mkdir ../../hs_metrics

for BAM in *.lane_1.dedup.bam
do
    while [ $(jobs | wc -l) -ge 8 ] ; do sleep 1; done
    samtools merge ${BAM%%.lane*}.merged.bam $BAM ${BAM%%.lane*}.lane_2.dedup.bam &
done
wait

for MGB in *.merged.bam
do
    while [ $(jobs | wc -l) -ge 8 ] ; do sleep 1; done
    java -Xmx2g -jar /Molly/barbitoff/software/picard-tools-2.0.1/picard.jar CalculateHsMetrics I=$MGB O=../../hs_metrics/${MGB%%.merg*}.metrics BAIT_INTERVALS=/Molly/barbitoff/reference/${PLATFORM}.intervals TARGET_INTERVALS=/Molly/barbitoff/reference/${PLATFORM}.intervals NEAR_DISTANCE=0 LEVEL=SAMPLE &
done
wait

rm *.merged.bam

cd ../../hs_metrics
echo SAMPLE > sample

for METRIC in *.metrics
do
    TAG=${METRIC%%.metrics}
    awk '{ if (NR == 8) print }' $METRIC > tmp
    echo $TAG > tmp2
    paste tmp2 tmp  >> data
    rm tm*
done

awk '{   if (NR == 7) print }' $METRIC > tmpheader
paste sample tmpheader > header
cat header data > ${METRIC%%_sample*}.metrics
rm data header tmpheader sample

echo 'Done with HSMetrics'

Перевыравнивание около участков инделов

#!/bin/bash

# Indel realignment using GATK

PLATFORM=$1
# illumina or roche

# Index directory of deduplicated bam files
# Indexing is required for GATK to work properly

for BAM in *.dedup.bam
do
    /Molly/barbitoff/software/samtools-1.3/samtools index $BAM &
done
wait

echo 'Indexing done, ready to use GATK!'

mkdir gatk_logs

for DDB in *.dedup.bam
do
    while [ $(jobs | wc -l) -ge 6 ] ; do sleep 1; done
    java -Xmx2g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T RealignerTargetCreator -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $DDB -L /Molly/barbitoff/reference/${PLATFORM}.intervals -known /Molly/barbitoff/gatk-bundle-b37/Mills_and_1000G_gold_standard.indels.b37.vcf -o ${DDB%%.dedup.bam}.target.intervals &> ./gatk_logs/${DDB%%.dedup.bam}.TargetCreator.log &
done
wait

echo 'Intervals for realignment created!'

for DDB in *.dedup.bam
do
    while [ $(jobs | wc -l) -ge 6 ] ; do sleep 1; done
    echo 'Realigning' ${DDB%%.dedup.bam}; java -Xmx2g -jar /Molly/barbitoff/software/gatk/GenomeAnalysisTK.jar -T IndelRealigner -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $DDB -targetIntervals ${DDB%%.dedup.bam}.target.intervals -L /Molly/barbitoff/reference/${PLATFORM}.intervals -known /Molly/barbitoff/gatk-bundle-b37/Mills_and_1000G_gold_standard.indels.b37.vcf -o ${DDB%%.dedup.bam}.realigned.bam &> ./gatk_logs/${DDB%%.dedup.bam}.IndelRealigner.log &
done
wait

mkdir ./gatk_logs/realigner_intervals
mv *.intervals ./gatk_logs/realigner_intervals

mkdir dedupped
mv *.dedup.* dedupped

echo 'Finished realigning indel segments, BAMs ready for BQSR!'

Перекалибровка качества оснований

#!/bin/bash

# Run BQSR for WES data
# Note that the platform must be specified to correct only using the target regions

PLATFORM=$1
# illumina or roche

for BAM in *.realigned.bam
do
    java -Xmx20g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T BaseRecalibrator -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $BAM -knownSites /Molly/barbitoff/gatk-bundle-b37/dbsnp_138.b37.vcf -knownSites /Molly/barbitoff/gatk-bundle-b37/Mills_and_1000G_gold_standard.indels.b37.vcf -L /Molly/barbitoff/reference/${PLATFORM}.intervals -o ${BAM%%.realigned.bam}.recal.table > ./gatk_logs/${BAM%%.realigned.bam}.BaseRecalibrator.log
done

for BAM in *.realigned.bam
do
    java -Xmx20g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T PrintReads -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $BAM -BQSR ${BAM%%.realigned.bam}.recal.table -o ${BAM%%.realigned.bam}.recal.bam > ./gatk_logs/${BAM%%.realigned.bam}.PrintReads.log
done


echo 'Recalibration of base qualities done, BAMs are ready for calling!'

Анализ ковариат

#!/bin/bash

# Analyzing covariates for estimating BQSR effect from data

PLATFORM=$1

for BAM in *.realigned.bam
do
    java -Xmx20g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T BaseRecalibrator -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $BAM -knownSites /Molly/barbitoff/gatk-bundle-b37/dbsnp_138.b37.vcf -knownSites /Molly/barbitoff/gatk-bundle-b37/Mills_and_1000G_gold_standard.indels.b37.vcf -L /Molly/barbitoff/reference/${PLATFORM}.intervals -BQSR ${BAM%%.realigned.bam}.recal.table -o ${BAM%%.realigned.bam}.recal.after.table > ./gatk_logs/${BAM%%.realigned.bam}.SPBaseRecalibrator.log
done

mkdir ../bqsr_pdfs

for BAM in *.recal.bam
do
    java -Xmx20g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T AnalyzeCovariates -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -before ${BAM%%.bam}.table -after ${BAM%%.bam}.after.table -plots ../bqsr_pdfs/${BAM%%.bam}.pdf
done

mkdir gatk_logs/recal_tables

mv *.table gatk_logs/recal_tables/

echo 'Analysis of covariates done!'