8000 GitHub - kevin-furant/runExomeDepth
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

kevin-furant/runExomeDepth

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

61 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

ExomeDepth for WES CNV calling

simple run

Rscript run.R bam.list bai.list

speed up

  1. create Bins
  • Input:
    ref as hg19 reference file
  • Output:
    exons.hg19.A.target.rds
    exons.hg19.A.rdata.rds exons.hg19.X.target.rds
    exons.hg19.X.rdata.rds
  • Script:
library(ExomeDepth)
data(exons.hg19)
data(exons.hg19.X)
source("getBins.R")
ref='hg19.fa'
getBins(
    bed.frame = exons.hg19,
    include.chr = T,
    referenceFasta = ref,
    prefix="exons.hg19.A"
    )
getBins(
    bed.frame = exons.hg19.X,
    include.chr = T,
    referenceFasta = ref,
    prefix="exons.hg19.X"
    )

or

Rscript init.R hg19.fa
  1. run getBamCounts parallel for each sample
  • Input:
    sampleID as prefix
    bam as bam file path
  • Output:
    SampleID.my.count.rds
  • Script:
args=commandArgs(T)
sampleID=args[1]
bam=args[2]
library(ExomeDepth)
source("getBamCount.R")
target <- readRDS("exons.hg19.target.rds")
my.count <- getBamCount(
    target = target,
    bam = bam
    )
saveRDS(my.count,file=paste0(sampleID,".my.count.rds"))
  1. create main matrix of read count data of all sample
  • Input:
    sample.list as sampleID list
  • Output:
    all.my.counts.rds as all count data in a matrix
  • Script:
args=commandArgs(T)
sample.list=args[1]
#sample.list="sample.list"
samples=read.table(sample.list,stringsAsFactors=F)[,1]
library(ExomeDepth)
my.bins <- readRDS('exons.hg19.rdata.rds')
my.counts.matrix <- matrix(ncol=length(samples),nrow=nrow(my.bins))
colnames(my.counts.matrix) <- samples
for(i in 1:length(samples)){
    my.counts.matrix[,i] <- readRDS(paste0(samples[i],".my.count.rds"))
}
saveRDS(my.counts.matrix,file=paste0('all',".my.counts.rds"))
  1. run CallCNVs for each sample
  • Input:
    sampleID as selected sample to call CNVs
  • Output:
    sampleID.CNV.calls.tsv as CNV calls result sampleID.all.exons.rds as rds data of CNV calls
  • Script:
args=commandArgs(T)
sampleID=args[1]
library(ExomeDepth)
data(exons.hg19)
data(Conrad.hg19)
exons.hg19.GRanges <- GenomicRanges::GRanges(
    seqnames = exons.hg19$chromosome,
    IRanges::IRanges(start=exons.hg19$start,end=exons.hg19$end),
    names = exons.hg19$name
    )
my.counts.matrix <- readRDS(file=paste0('all',".my.counts.rds"))
samples <- colnames(my.counts.matrix)
for(i in 1:length(samples)){
    if(sampleID!=samples[i]){
        next
    }
    my.choice <- select.reference.set(
        test.counts = my.counts.matrix[,i],
        reference.counts = my.counts.matrix[,-i],
        bin.length = (exons.hg19$end - exons.hg19$start)/1000,
        n.bins.reduced = 10000
        )
    my.reference.selected <- apply(
        X = my.counts.matrix[,my.choice$reference.choice,drop=FALSE],
        MAR = 1,
        FUN = sum
        )
    message('Now creating the ExomeDepth object')
    all.exons <- new(
        'ExomeDepth',
        test = my.counts.matrix[,i],
        reference = my.reference.selected,
        formula = 'cbind(test, reference) ~ 1'
        )
######## Now call the CNVs
    all.exons <- CallCNVs(
        x = all.exons,
        transition.probability = 10^-4,
        chromosome = exons.hg19$chromosome,
        start = exons.hg19$start+1,
        end = exons.hg19$end,
        name = exons.hg19$name
        )
######## Now annotate the ExomeDepth object
    all.exons <- AnnotateExtra(
        x = all.exons,
        reference.annotation = Conrad.hg19.common.CNVs,
        min.overlap = 0.5,
        column.name = 'Conrad.hg19'
        )
    all.exons <- AnnotateExtra(
        x = all.exons,
        reference.annotation = exons.hg19.GRanges,
        min.overlap = 0.0001,
        column.name = 'Conrad.hg19'
        )
    output.file <- paste0(sampleID,'.CNV.calls.tsv')
    write.table(file=output.file,x=all.exons@CNV.calls,row.names=F,sep="\t",quote=F)
    saveRDS(all.exons,file=paste0(sampleID,'.all.exons.rds'))
}

example

  1. Rscript run.getBamCount.R SampleID bam.file for each sample
    input:SampleID as prefix and bam.file as bam path
    output:SampleID.my.count.rds
    time:15mins for each
  2. Rscript run.getAllCounts.R sample.list
    input:sample.list as batch list of samples
    output:all.my.counts.rds
    time:12s
  3. Rscript run.CallCNVs.R
    input:all.my.counts.rds
    output:SampleID.CNV.calls.tsv and SampleID.all.exons.rds for each sample
    time:12mins for total 12 test samples
  4. Rscript run.getCNVs.R SampleID
    input:all.my.counts.rds and selected sample SampleID
    output:SampleID.CNV.calls.tsv and SampleID.all.exons.rds for selected sample
    time:1mins

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published
0