Подготовка выровненных ридов к определения вариантов
Материал из Институт биоинформатики
Содержание
Маркировка дупликатов
#!/bin/bash # Marking duplicate reads in BAM files mkdir picard_logs for BAM in *.sorted.bam do while [ $(jobs | wc -l) -ge 12 ] ; do sleep 1; done echo Dedupping $BAM; java -Xmx2g -jar /Molly/barbitoff/software/picard-tools-2.0.1/picard.jar MarkDuplicates I=$BAM O=${BAM%%.sorted.bam}.dedup.bam M=picard_logs/${BAM%%.sorted.bam}.metrics &> picard_logs/${BAM%%.sorted.bam}.log & done wait echo 'Finished marking duplicates, ready for calculating HSMetrics!'
Сбор метрик для анализа обогащения
#!/bin/bash # Merging BAMs for each sample and piping into HsMetrics PLATFORM=$1 mkdir ../../hs_metrics for BAM in *.lane_1.dedup.bam do while [ $(jobs | wc -l) -ge 8 ] ; do sleep 1; done samtools merge ${BAM%%.lane*}.merged.bam $BAM ${BAM%%.lane*}.lane_2.dedup.bam & done wait for MGB in *.merged.bam do while [ $(jobs | wc -l) -ge 8 ] ; do sleep 1; done java -Xmx2g -jar /Molly/barbitoff/software/picard-tools-2.0.1/picard.jar CalculateHsMetrics I=$MGB O=../../hs_metrics/${MGB%%.merg*}.metrics BAIT_INTERVALS=/Molly/barbitoff/reference/${PLATFORM}.intervals TARGET_INTERVALS=/Molly/barbitoff/reference/${PLATFORM}.intervals NEAR_DISTANCE=0 LEVEL=SAMPLE & done wait rm *.merged.bam cd ../../hs_metrics echo SAMPLE > sample for METRIC in *.metrics do TAG=${METRIC%%.metrics} awk '{ if (NR == 8) print }' $METRIC > tmp echo $TAG > tmp2 paste tmp2 tmp >> data rm tm* done awk '{ if (NR == 7) print }' $METRIC > tmpheader paste sample tmpheader > header cat header data > ${METRIC%%_sample*}.metrics rm data header tmpheader sample echo 'Done with HSMetrics'
Перевыравнивание около участков инделов
#!/bin/bash # Indel realignment using GATK PLATFORM=$1 # illumina or roche # Index directory of deduplicated bam files # Indexing is required for GATK to work properly for BAM in *.dedup.bam do /Molly/barbitoff/software/samtools-1.3/samtools index $BAM & done wait echo 'Indexing done, ready to use GATK!' mkdir gatk_logs for DDB in *.dedup.bam do while [ $(jobs | wc -l) -ge 6 ] ; do sleep 1; done java -Xmx2g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T RealignerTargetCreator -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $DDB -L /Molly/barbitoff/reference/${PLATFORM}.intervals -known /Molly/barbitoff/gatk-bundle-b37/Mills_and_1000G_gold_standard.indels.b37.vcf -o ${DDB%%.dedup.bam}.target.intervals &> ./gatk_logs/${DDB%%.dedup.bam}.TargetCreator.log & done wait echo 'Intervals for realignment created!' for DDB in *.dedup.bam do while [ $(jobs | wc -l) -ge 6 ] ; do sleep 1; done echo 'Realigning' ${DDB%%.dedup.bam}; java -Xmx2g -jar /Molly/barbitoff/software/gatk/GenomeAnalysisTK.jar -T IndelRealigner -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $DDB -targetIntervals ${DDB%%.dedup.bam}.target.intervals -L /Molly/barbitoff/reference/${PLATFORM}.intervals -known /Molly/barbitoff/gatk-bundle-b37/Mills_and_1000G_gold_standard.indels.b37.vcf -o ${DDB%%.dedup.bam}.realigned.bam &> ./gatk_logs/${DDB%%.dedup.bam}.IndelRealigner.log & done wait mkdir ./gatk_logs/realigner_intervals mv *.intervals ./gatk_logs/realigner_intervals mkdir dedupped mv *.dedup.* dedupped echo 'Finished realigning indel segments, BAMs ready for BQSR!'
Перекалибровка качества оснований
#!/bin/bash # Run BQSR for WES data # Note that the platform must be specified to correct only using the target regions PLATFORM=$1 # illumina or roche for BAM in *.realigned.bam do java -Xmx20g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T BaseRecalibrator -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $BAM -knownSites /Molly/barbitoff/gatk-bundle-b37/dbsnp_138.b37.vcf -knownSites /Molly/barbitoff/gatk-bundle-b37/Mills_and_1000G_gold_standard.indels.b37.vcf -L /Molly/barbitoff/reference/${PLATFORM}.intervals -o ${BAM%%.realigned.bam}.recal.table > ./gatk_logs/${BAM%%.realigned.bam}.BaseRecalibrator.log done for BAM in *.realigned.bam do java -Xmx20g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T PrintReads -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $BAM -BQSR ${BAM%%.realigned.bam}.recal.table -o ${BAM%%.realigned.bam}.recal.bam > ./gatk_logs/${BAM%%.realigned.bam}.PrintReads.log done echo 'Recalibration of base qualities done, BAMs are ready for calling!'
Анализ ковариат
#!/bin/bash # Analyzing covariates for estimating BQSR effect from data PLATFORM=$1 for BAM in *.realigned.bam do java -Xmx20g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T BaseRecalibrator -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -I $BAM -knownSites /Molly/barbitoff/gatk-bundle-b37/dbsnp_138.b37.vcf -knownSites /Molly/barbitoff/gatk-bundle-b37/Mills_and_1000G_gold_standard.indels.b37.vcf -L /Molly/barbitoff/reference/${PLATFORM}.intervals -BQSR ${BAM%%.realigned.bam}.recal.table -o ${BAM%%.realigned.bam}.recal.after.table > ./gatk_logs/${BAM%%.realigned.bam}.SPBaseRecalibrator.log done mkdir ../bqsr_pdfs for BAM in *.recal.bam do java -Xmx20g -jar /Molly/barbitoff/software/gatk-protected/target/GenomeAnalysisTK.jar -T AnalyzeCovariates -R /Molly/barbitoff/reference/GATK_b37/human_g1k_v37.fasta -before ${BAM%%.bam}.table -after ${BAM%%.bam}.after.table -plots ../bqsr_pdfs/${BAM%%.bam}.pdf done mkdir gatk_logs/recal_tables mv *.table gatk_logs/recal_tables/ echo 'Analysis of covariates done!'