% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Estimate.R
\name{estimateSpike}
\alias{estimateSpike}
\title{Estimate simulation parameters for spike-ins}
\usage{
estimateSpike(spikeData,
spikeInfo,
MeanFragLengths = NULL,
batchData = NULL,
Normalisation=c('depth','none'),
SampleFilter = 3,
RNAseq = c("bulk", "singlecell"),
Protocol = c('UMI', 'Read'),
verbose = TRUE)
}
\arguments{
\item{spikeData}{is a count \code{matrix}. Rows correspond to spike-ins, columns to samples.
\code{rownames} should contain the spike-in names, \code{colnames} the sample names.}

\item{spikeInfo}{is a molecule count \code{matrix} of spike-ins. Rows correspond to spike-ins.
The order of rows should be the same as the rows in \code{spikeData}.
The rownames should be the same as the rownames of spikeData.
The first column must contain the molecule counts of spike-ins and be named 'SpikeInput'.
The second column can contain the sequence lengths of spike-ins in bases and be named 'Lengths'.}

\item{MeanFragLengths}{is a numeric vector of the mean fragment length.}

\item{batchData}{is a \code{data.frame} for batch annotation. Rows correspond to samples.
The order of rows should be the same as the columns in \code{spikeData}.
The rownames should be the same as the column names of spikeData.
The first column should contain the batch annotation, e.g. 'a' for batch 1, 'b' for batch 2.}

\item{Normalisation}{is a character value: 'depth' or 'none'. For more information, please consult the details section.}

\item{SampleFilter}{is a numeric vector indicating the minimal number of MADs (median absolute deviation)
away from the median number of features detected as well as sequencing depth across all samples
so that outlying samples are removed prior to normalisation and parameter estimation.
The default is \code{5}, i.e. at least 5 MADs away from the median.
Choose higher values if you want to filter out less samples.
This parameter is particularly important for single cells to ensure reliable parameter estimation.
For more information, please consult \code{\link[scater]{isOutlier}}.}

\item{RNAseq}{is a character value: "bulk" or "singlecell".}

\item{Protocol}{is a character value defining the type of counts given in \code{countData}.
Options are "UMI" (e.g. 10X Genomics, CEL-seq2) or "Read" (e.g. Smart-seq2).}

\item{verbose}{Logical value to indicate whether to print function information.
Default is \code{TRUE}.}
}
\value{
List object with the following entries:
\item{normCounts}{The normalised spike-in read counts \code{data.frame}.}
\item{normParams}{The mean and standard deviation per spike-in using normalised read counts in a \code{data.frame}.}
\item{CaptureEfficiency}{The ad-hoc estimated as well as fitted detection probabilities with confidence interval per spike-in using normalised read counts in a \code{data.frame}.}
\item{seqDepth}{Library size, i.e. total number of reads per library}
\item{sf}{The estimated library size factors.}
\item{EVGammaThetaEstimates}{Estimation of the four parameters capturing technical variability,
namely E[\eqn{\gamma}], Var[\eqn{\gamma}], E[\eqn{\theta}] and Var[\eqn{\theta}].
For more details, please consult supplementary information of Kim et al. 2016 (DOI: 10.1038/ncomms9687).
These estimates are needed for simulating spike-in read counts.}
\item{Input}{The input spike-in expression \code{matrix}, molecule counts \code{data.frame} and batch annotation \code{data.frame},
filtered so that only spike-ins with nonzero expression and samples with at least 100 reads are retained.}
\item{Settings}{Reporting the chosen normalisation framework.}
}
\description{
This function estimates and returns parameters needed for spike-in count simulations using supplementary code from Kim et al. 2016 (DOI: 10.1038/ncomms9687).
}
\details{
Normalisation methods
\describe{
\item{'depth'}{applies the depth normalization method as implemented in \code{\link[scuttle]{computeSpikeFactors}}.}
\item{'none'}{No normalisation is applied. This approach can be used for prenormalized expression estimates, e.g. TPM/FPKM/RPKM estimated by RSEM, salmon, cufflinks etc.}
}
}
\examples{
\dontrun{
data("SmartSeq2_SpikeIns_Read_Counts")
data("SmartSeq2_SpikeInfo")
Batches = data.frame(Batch = sapply(strsplit(colnames(SmartSeq2_SpikeIns_Read_Counts),
                                    "_"), "[[", 1),
                     stringsAsFactors = F,
                     row.names = colnames(SmartSeq2_SpikeIns_Read_Counts))
# estimation
spike_param <- estimateSpike(spikeData = SmartSeq2_SpikeIns_Read_Counts,
spikeInfo = SmartSeq2_SpikeInfo,
MeanFragLength = NULL,
batchData = Batches,
Normalisation = 'depth')
# plotting
plotSpike(estSpike = spike_param, Annot = FALSE)
}
}
\author{
Beate Vieth
}
