Preliminaries
Setup the conda environment
Here we install the appopriate python version, along with all of the
required R and python packages.
conda create -y -n ikar3.8 -python=3.8
conda install -y libgcc
conda install -y r-essentials r-base
conda install -y -c bioconda bioconductor-biocinstaller bioconductor-zellkonverter bioconductor-singlecellexperiment
conda install -y r-seurat
pip install git+https://github.com/BIMSBbioinfo/ikarus.git
Load the conda environment in R
Start the R interpreter, load reticulate, and load
the conda environment.
conda_path should contain the path to the installed
conda environment.
library(reticulate)
conda_path = c("~/bin/Software/miniconda3/envs/ikar3.8")
use_condaenv(conda_path)
Data preparation
Download the ikarus model
trained_model_path = "https://github.com/BIMSBbioinfo/ikarus/raw/master/tutorials/out/core_model.joblib"
download.file(trained_model_path, "core_model.joblib")
Download gene signatures
signatures_path = "https://github.com/BIMSBbioinfo/ikarus/raw/master/tutorials/out/signatures.gmt"
download.file(signatures_path, "signatures.gmt")
Run Ikarus
Download the adata
adata_path = "https://bimsbstatic.mdc-berlin.de/akalin/Ikarus/part_1/data/tirosh17_headneck/adata.h5ad"
download.file(adata_path, "tirosh_adata.h5ad")
Read the h5ad into an anndata object
anndata = import("anndata")
adata = anndata$read_h5ad("tirosh_adata.h5ad")
Load the trained ikarus model
ikarus = import("ikarus")
model = ikarus$classifier$Ikarus(
signatures_gmt = file.path("signatures.gmt"),
out_dir="ikarus_path"
)
model$load_core_model("core_model.joblib")
Predict the cell class
class_predict = model$predict(adata, "test")
Look at the prediction results
results = data.frame(
true = adata$obs$tier_0,
pred = class_predict
)
with(results, table(true, pred))
pred
true Normal Tumor
Normal 3362 1
Tumor 8 2207
MISC helpers
Seurat to SingleCellExperiment
sce = SingleCellExperiment(
assays = list(counts = GetAssayData(seu, "counts", "RNA"),
colData = DataFrame(seu@meta.data)
)
Convert SingleCellExperiment to
AnnData
library(basilisk)
adata = basiliskRun(fun = function(sce) {
# Convert SCE to AnnData:
SCE2AnnData(sce)
}, env = conda_path, sce = sce)
Process count data using ikarus
If the input adata object contains counts, the
matrix can be processed using ikarus preprocess_adata
function.
If the data is already normalized, no preprocessing is required
IMPORTANT the data should not be scaled!
adata = ikarus$data$preprocess_adata(adata)
LS0tDQp0aXRsZTogImlrYXJ1cyBjYW5jZXIgY2VsbCBjbGFzc2lmaWNhdGlvbiBpbiBSIg0KYXV0aG9yOiAiVmVkcmFuIEZyYW5rZSINCmVtYWlsOiAidmVkcmFuLmZyYW5rZUBtZGMtYmVybGluLmRlIg0KZGF0ZTogInRvZGF5Ig0Kb3V0cHV0OiANCiAgaHRtbF9ub3RlYm9vazoNCiAgICB0b2M6IHRydWUNCi0tLQ0KDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNCiMgUHJlbGltaW5hcmllcw0KDQojIyBTZXR1cCB0aGUgY29uZGEgZW52aXJvbm1lbnQNCg0KSGVyZSB3ZSBpbnN0YWxsIHRoZSBhcHBvcHJpYXRlIHB5dGhvbiB2ZXJzaW9uLCBhbG9uZyB3aXRoIGFsbA0Kb2YgdGhlIHJlcXVpcmVkIFIgYW5kIHB5dGhvbiBwYWNrYWdlcy4NCg0KYGBge3B5dGhvbiwgZXZhbD1GQUxTRX0NCmNvbmRhIGNyZWF0ZSAgLXkgLW4gaWthcjMuOCAtcHl0aG9uPTMuOA0KY29uZGEgaW5zdGFsbCAteSBsaWJnY2MNCmNvbmRhIGluc3RhbGwgLXkgci1lc3NlbnRpYWxzIHItYmFzZQ0KY29uZGEgaW5zdGFsbCAteSAtYyBiaW9jb25kYSBiaW9jb25kdWN0b3ItYmlvY2luc3RhbGxlciBiaW9jb25kdWN0b3ItemVsbGtvbnZlcnRlciBiaW9jb25kdWN0b3Itc2luZ2xlY2VsbGV4cGVyaW1lbnQNCmNvbmRhIGluc3RhbGwgLXkgci1zZXVyYXQNCnBpcCBpbnN0YWxsIGdpdCtodHRwczovL2dpdGh1Yi5jb20vQklNU0JiaW9pbmZvL2lrYXJ1cy5naXQNCmBgYA0KDQoNCiMgTG9hZCB0aGUgY29uZGEgZW52aXJvbm1lbnQgaW4gUg0KDQpTdGFydCB0aGUgUiBpbnRlcnByZXRlciwgbG9hZCAqKnJldGljdWxhdGUqKiwgYW5kDQpsb2FkIHRoZSBjb25kYSBlbnZpcm9ubWVudC4NCg0KKipjb25kYV9wYXRoKiogc2hvdWxkIGNvbnRhaW4gdGhlIHBhdGggdG8gdGhlIGluc3RhbGxlZCBjb25kYSBlbnZpcm9ubWVudC4NCg0KYGBge3IgbG9hZF9yZXRpY3VsYXRlfQ0KbGlicmFyeShyZXRpY3VsYXRlKQ0KY29uZGFfcGF0aCA9IGMoIn4vYmluL1NvZnR3YXJlL21pbmljb25kYTMvZW52cy9pa2FyMy44IikNCnVzZV9jb25kYWVudihjb25kYV9wYXRoKQ0KYGBgDQoNCg0KIyBEYXRhIHByZXBhcmF0aW9uDQoNCiMjIERvd25sb2FkIHRoZSBpa2FydXMgbW9kZWwNCg0KYGBge3IgZG93bmxvYWRfbW9kZWx9DQp0cmFpbmVkX21vZGVsX3BhdGggPSAiaHR0cHM6Ly9naXRodWIuY29tL0JJTVNCYmlvaW5mby9pa2FydXMvcmF3L21hc3Rlci9vdXRfdHV0b3JpYWwvY29yZV9tb2RlbC5qb2JsaWIiDQoNCmRvd25sb2FkLmZpbGUodHJhaW5lZF9tb2RlbF9wYXRoLCAiY29yZV9tb2RlbC5qb2JsaWIiKQ0KYGBgDQoNCiMjIyBEb3dubG9hZCBnZW5lIHNpZ25hdHVyZXMNCg0KYGBge3IgZG93bmxvYWRfc2lnbmF0dXJlc30NCnNpZ25hdHVyZXNfcGF0aCA9ICJodHRwczovL2dpdGh1Yi5jb20vQklNU0JiaW9pbmZvL2lrYXJ1cy9yYXcvbWFzdGVyL291dF90dXRvcmlhbC9zaWduYXR1cmVzLmdtdCINCmRvd25sb2FkLmZpbGUoc2lnbmF0dXJlc19wYXRoLCAic2lnbmF0dXJlcy5nbXQiKQ0KYGBgDQoNCiMgUnVuIElrYXJ1cw0KDQojIyBEb3dubG9hZCB0aGUgYWRhdGENCg0KYGBge3IgZG93bmxvYWRfZGF0YX0NCmFkYXRhX3BhdGggPSAiaHR0cHM6Ly9iaW1zYnN0YXRpYy5tZGMtYmVybGluLmRlL2FrYWxpbi9Ja2FydXMvcGFydF8xL2RhdGEvdGlyb3NoMTdfaGVhZG5lY2svYWRhdGEuaDVhZCINCmRvd25sb2FkLmZpbGUoYWRhdGFfcGF0aCwgInRpcm9zaF9hZGF0YS5oNWFkIikNCmBgYA0KDQojIyBSZWFkIHRoZSBoNWFkIGludG8gYW4gYW5uZGF0YSBvYmplY3QNCg0KYGBge3IgcmVhZF9hbm5kYXRhfQ0KYW5uZGF0YSA9IGltcG9ydCgiYW5uZGF0YSIpDQphZGF0YSA9IGFubmRhdGEkcmVhZF9oNWFkKCJ0aXJvc2hfYWRhdGEuaDVhZCIpDQpgYGANCg0KIyMgTG9hZCB0aGUgdHJhaW5lZCBpa2FydXMgbW9kZWwNCg0KYGBge3IgbG9hZF9pa2FydXN9DQppa2FydXMgPSBpbXBvcnQoImlrYXJ1cyIpDQptb2RlbCAgPSBpa2FydXMkY2xhc3NpZmllciRJa2FydXMoDQogICAgIA0KICBzaWduYXR1cmVzX2dtdCA9IGZpbGUucGF0aCgic2lnbmF0dXJlcy5nbXQiKSwgDQogIG91dF9kaXI9ImlrYXJ1c19wYXRoIg0KKQ0KbW9kZWwkbG9hZF9jb3JlX21vZGVsKCJjb3JlX21vZGVsLmpvYmxpYiIpDQpgYGANCg0KIyMgUHJlZGljdCB0aGUgY2VsbCBjbGFzcw0KDQpgYGB7ciBwcmVkaWN0X2NlbGxfY2xhc3N9DQpjbGFzc19wcmVkaWN0ID0gbW9kZWwkcHJlZGljdChhZGF0YSwgInRlc3QiKQ0KYGBgDQoNCiMjIExvb2sgYXQgdGhlIHByZWRpY3Rpb24gcmVzdWx0cw0KDQpgYGB7ciBjb21wYXJlX3Jlc3VsdHN9DQpyZXN1bHRzID0gZGF0YS5mcmFtZSgNCiAgdHJ1ZSA9IGFkYXRhJG9icyR0aWVyXzAsDQogIHByZWQgPSBjbGFzc19wcmVkaWN0DQopDQp3aXRoKHJlc3VsdHMsIHRhYmxlKHRydWUsIHByZWQpKQ0KDQpgYGANCg0KDQojIE1JU0MgaGVscGVycw0KDQojIyBTZXVyYXQgdG8gU2luZ2xlQ2VsbEV4cGVyaW1lbnQNCg0KYGBge3Igc2V1cmF0X3RvX3NpbmdsZUNlbGxFeHBlcmltZW50LCBldmFsPUZBTFNFfQ0Kc2NlID0gU2luZ2xlQ2VsbEV4cGVyaW1lbnQoDQogIGFzc2F5cyA9IGxpc3QoY291bnRzID0gR2V0QXNzYXlEYXRhKHNldSwgImNvdW50cyIsICJSTkEiKSwNCiAgY29sRGF0YSA9IERhdGFGcmFtZShzZXVAbWV0YS5kYXRhKQ0KKSANCmBgYA0KDQoNCiMjIENvbnZlcnQgKipTaW5nbGVDZWxsRXhwZXJpbWVudCoqIHRvICoqQW5uRGF0YSoqDQoNCg0KYGBge3IsIHNpbmdsZWNlbGxleHBlcmltZW50X3RvX2FubmRhdGEsIGV2YWw9RkFMU0V9DQpsaWJyYXJ5KGJhc2lsaXNrKQ0KYWRhdGEgPSBiYXNpbGlza1J1bihmdW4gPSBmdW5jdGlvbihzY2UpIHsNCiAgICAgIyBDb252ZXJ0IFNDRSB0byBBbm5EYXRhOg0KICAgICBTQ0UyQW5uRGF0YShzY2UpDQogICAgIA0KfSwgZW52ID0gY29uZGFfcGF0aCwgc2NlID0gc2NlKQ0KYGBgDQoNCiMjIFByb2Nlc3MgY291bnQgZGF0YSB1c2luZyBpa2FydXMNCg0KSWYgdGhlIGlucHV0ICoqYWRhdGEqKiBvYmplY3QgY29udGFpbnMgY291bnRzLCB0aGUgbWF0cml4DQpjYW4gYmUgcHJvY2Vzc2VkIHVzaW5nIGlrYXJ1cyAqKnByZXByb2Nlc3NfYWRhdGEqKiBmdW5jdGlvbi4NCg0KSWYgdGhlIGRhdGEgaXMgYWxyZWFkeSBub3JtYWxpemVkLCBubyBwcmVwcm9jZXNzaW5nIGlzIHJlcXVpcmVkDQoNCioqSU1QT1JUQU5UKiogdGhlIGRhdGEgc2hvdWxkIG5vdCBiZSBzY2FsZWQhDQoNCmBgYHtyIHByb2Nlc3NfZGF0YSwgZXZhbD1GQUxTRX0NCmFkYXRhID0gaWthcnVzJGRhdGEkcHJlcHJvY2Vzc19hZGF0YShhZGF0YSkNCmBgYA0KDQoNCg0K