Preliminaries

Setup the conda environment

Here we install the appopriate python version, along with all of the required R and python packages.

conda create  -y -n ikar3.8 -python=3.8
conda install -y libgcc
conda install -y r-essentials r-base
conda install -y -c bioconda bioconductor-biocinstaller bioconductor-zellkonverter bioconductor-singlecellexperiment
conda install -y r-seurat
pip install git+https://github.com/BIMSBbioinfo/ikarus.git

Load the conda environment in R

Start the R interpreter, load reticulate, and load the conda environment.

conda_path should contain the path to the installed conda environment.

library(reticulate)
conda_path = c("~/bin/Software/miniconda3/envs/ikar3.8")
use_condaenv(conda_path)

Data preparation

Download the ikarus model

trained_model_path = "https://github.com/BIMSBbioinfo/ikarus/raw/master/tutorials/out/core_model.joblib"

download.file(trained_model_path, "core_model.joblib")

Download gene signatures

signatures_path = "https://github.com/BIMSBbioinfo/ikarus/raw/master/tutorials/out/signatures.gmt"
download.file(signatures_path, "signatures.gmt")

Run Ikarus

Download the adata

adata_path = "https://bimsbstatic.mdc-berlin.de/akalin/Ikarus/part_1/data/tirosh17_headneck/adata.h5ad"
download.file(adata_path, "tirosh_adata.h5ad")

Read the h5ad into an anndata object

anndata = import("anndata")
adata = anndata$read_h5ad("tirosh_adata.h5ad")

Load the trained ikarus model

ikarus = import("ikarus")
model  = ikarus$classifier$Ikarus(
     
  signatures_gmt = file.path("signatures.gmt"), 
  out_dir="ikarus_path"
)
model$load_core_model("core_model.joblib")

Predict the cell class

class_predict = model$predict(adata, "test")

Look at the prediction results

results = data.frame(
  true = adata$obs$tier_0,
  pred = class_predict
)
with(results, table(true, pred))
        pred
true     Normal Tumor
  Normal   3362     1
  Tumor       8  2207

MISC helpers

Seurat to SingleCellExperiment

sce = SingleCellExperiment(
  assays = list(counts = GetAssayData(seu, "counts", "RNA"),
  colData = DataFrame(seu@meta.data)
) 

Convert SingleCellExperiment to AnnData

library(basilisk)
adata = basiliskRun(fun = function(sce) {
     # Convert SCE to AnnData:
     SCE2AnnData(sce)
     
}, env = conda_path, sce = sce)

Process count data using ikarus

If the input adata object contains counts, the matrix can be processed using ikarus preprocess_adata function.

If the data is already normalized, no preprocessing is required

IMPORTANT the data should not be scaled!

adata = ikarus$data$preprocess_adata(adata)
LS0tDQp0aXRsZTogImlrYXJ1cyBjYW5jZXIgY2VsbCBjbGFzc2lmaWNhdGlvbiBpbiBSIg0KYXV0aG9yOiAiVmVkcmFuIEZyYW5rZSINCmVtYWlsOiAidmVkcmFuLmZyYW5rZUBtZGMtYmVybGluLmRlIg0KZGF0ZTogInRvZGF5Ig0Kb3V0cHV0OiANCiAgaHRtbF9ub3RlYm9vazoNCiAgICB0b2M6IHRydWUNCi0tLQ0KDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNCiMgUHJlbGltaW5hcmllcw0KDQojIyBTZXR1cCB0aGUgY29uZGEgZW52aXJvbm1lbnQNCg0KSGVyZSB3ZSBpbnN0YWxsIHRoZSBhcHBvcHJpYXRlIHB5dGhvbiB2ZXJzaW9uLCBhbG9uZyB3aXRoIGFsbA0Kb2YgdGhlIHJlcXVpcmVkIFIgYW5kIHB5dGhvbiBwYWNrYWdlcy4NCg0KYGBge3B5dGhvbiwgZXZhbD1GQUxTRX0NCmNvbmRhIGNyZWF0ZSAgLXkgLW4gaWthcjMuOCAtcHl0aG9uPTMuOA0KY29uZGEgaW5zdGFsbCAteSBsaWJnY2MNCmNvbmRhIGluc3RhbGwgLXkgci1lc3NlbnRpYWxzIHItYmFzZQ0KY29uZGEgaW5zdGFsbCAteSAtYyBiaW9jb25kYSBiaW9jb25kdWN0b3ItYmlvY2luc3RhbGxlciBiaW9jb25kdWN0b3ItemVsbGtvbnZlcnRlciBiaW9jb25kdWN0b3Itc2luZ2xlY2VsbGV4cGVyaW1lbnQNCmNvbmRhIGluc3RhbGwgLXkgci1zZXVyYXQNCnBpcCBpbnN0YWxsIGdpdCtodHRwczovL2dpdGh1Yi5jb20vQklNU0JiaW9pbmZvL2lrYXJ1cy5naXQNCmBgYA0KDQoNCiMgTG9hZCB0aGUgY29uZGEgZW52aXJvbm1lbnQgaW4gUg0KDQpTdGFydCB0aGUgUiBpbnRlcnByZXRlciwgbG9hZCAqKnJldGljdWxhdGUqKiwgYW5kDQpsb2FkIHRoZSBjb25kYSBlbnZpcm9ubWVudC4NCg0KKipjb25kYV9wYXRoKiogc2hvdWxkIGNvbnRhaW4gdGhlIHBhdGggdG8gdGhlIGluc3RhbGxlZCBjb25kYSBlbnZpcm9ubWVudC4NCg0KYGBge3IgbG9hZF9yZXRpY3VsYXRlfQ0KbGlicmFyeShyZXRpY3VsYXRlKQ0KY29uZGFfcGF0aCA9IGMoIn4vYmluL1NvZnR3YXJlL21pbmljb25kYTMvZW52cy9pa2FyMy44IikNCnVzZV9jb25kYWVudihjb25kYV9wYXRoKQ0KYGBgDQoNCg0KIyBEYXRhIHByZXBhcmF0aW9uDQoNCiMjIERvd25sb2FkIHRoZSBpa2FydXMgbW9kZWwNCg0KYGBge3IgZG93bmxvYWRfbW9kZWx9DQp0cmFpbmVkX21vZGVsX3BhdGggPSAiaHR0cHM6Ly9naXRodWIuY29tL0JJTVNCYmlvaW5mby9pa2FydXMvcmF3L21hc3Rlci9vdXRfdHV0b3JpYWwvY29yZV9tb2RlbC5qb2JsaWIiDQoNCmRvd25sb2FkLmZpbGUodHJhaW5lZF9tb2RlbF9wYXRoLCAiY29yZV9tb2RlbC5qb2JsaWIiKQ0KYGBgDQoNCiMjIyBEb3dubG9hZCBnZW5lIHNpZ25hdHVyZXMNCg0KYGBge3IgZG93bmxvYWRfc2lnbmF0dXJlc30NCnNpZ25hdHVyZXNfcGF0aCA9ICJodHRwczovL2dpdGh1Yi5jb20vQklNU0JiaW9pbmZvL2lrYXJ1cy9yYXcvbWFzdGVyL291dF90dXRvcmlhbC9zaWduYXR1cmVzLmdtdCINCmRvd25sb2FkLmZpbGUoc2lnbmF0dXJlc19wYXRoLCAic2lnbmF0dXJlcy5nbXQiKQ0KYGBgDQoNCiMgUnVuIElrYXJ1cw0KDQojIyBEb3dubG9hZCB0aGUgYWRhdGENCg0KYGBge3IgZG93bmxvYWRfZGF0YX0NCmFkYXRhX3BhdGggPSAiaHR0cHM6Ly9iaW1zYnN0YXRpYy5tZGMtYmVybGluLmRlL2FrYWxpbi9Ja2FydXMvcGFydF8xL2RhdGEvdGlyb3NoMTdfaGVhZG5lY2svYWRhdGEuaDVhZCINCmRvd25sb2FkLmZpbGUoYWRhdGFfcGF0aCwgInRpcm9zaF9hZGF0YS5oNWFkIikNCmBgYA0KDQojIyBSZWFkIHRoZSBoNWFkIGludG8gYW4gYW5uZGF0YSBvYmplY3QNCg0KYGBge3IgcmVhZF9hbm5kYXRhfQ0KYW5uZGF0YSA9IGltcG9ydCgiYW5uZGF0YSIpDQphZGF0YSA9IGFubmRhdGEkcmVhZF9oNWFkKCJ0aXJvc2hfYWRhdGEuaDVhZCIpDQpgYGANCg0KIyMgTG9hZCB0aGUgdHJhaW5lZCBpa2FydXMgbW9kZWwNCg0KYGBge3IgbG9hZF9pa2FydXN9DQppa2FydXMgPSBpbXBvcnQoImlrYXJ1cyIpDQptb2RlbCAgPSBpa2FydXMkY2xhc3NpZmllciRJa2FydXMoDQogICAgIA0KICBzaWduYXR1cmVzX2dtdCA9IGZpbGUucGF0aCgic2lnbmF0dXJlcy5nbXQiKSwgDQogIG91dF9kaXI9ImlrYXJ1c19wYXRoIg0KKQ0KbW9kZWwkbG9hZF9jb3JlX21vZGVsKCJjb3JlX21vZGVsLmpvYmxpYiIpDQpgYGANCg0KIyMgUHJlZGljdCB0aGUgY2VsbCBjbGFzcw0KDQpgYGB7ciBwcmVkaWN0X2NlbGxfY2xhc3N9DQpjbGFzc19wcmVkaWN0ID0gbW9kZWwkcHJlZGljdChhZGF0YSwgInRlc3QiKQ0KYGBgDQoNCiMjIExvb2sgYXQgdGhlIHByZWRpY3Rpb24gcmVzdWx0cw0KDQpgYGB7ciBjb21wYXJlX3Jlc3VsdHN9DQpyZXN1bHRzID0gZGF0YS5mcmFtZSgNCiAgdHJ1ZSA9IGFkYXRhJG9icyR0aWVyXzAsDQogIHByZWQgPSBjbGFzc19wcmVkaWN0DQopDQp3aXRoKHJlc3VsdHMsIHRhYmxlKHRydWUsIHByZWQpKQ0KDQpgYGANCg0KDQojIE1JU0MgaGVscGVycw0KDQojIyBTZXVyYXQgdG8gU2luZ2xlQ2VsbEV4cGVyaW1lbnQNCg0KYGBge3Igc2V1cmF0X3RvX3NpbmdsZUNlbGxFeHBlcmltZW50LCBldmFsPUZBTFNFfQ0Kc2NlID0gU2luZ2xlQ2VsbEV4cGVyaW1lbnQoDQogIGFzc2F5cyA9IGxpc3QoY291bnRzID0gR2V0QXNzYXlEYXRhKHNldSwgImNvdW50cyIsICJSTkEiKSwNCiAgY29sRGF0YSA9IERhdGFGcmFtZShzZXVAbWV0YS5kYXRhKQ0KKSANCmBgYA0KDQoNCiMjIENvbnZlcnQgKipTaW5nbGVDZWxsRXhwZXJpbWVudCoqIHRvICoqQW5uRGF0YSoqDQoNCg0KYGBge3IsIHNpbmdsZWNlbGxleHBlcmltZW50X3RvX2FubmRhdGEsIGV2YWw9RkFMU0V9DQpsaWJyYXJ5KGJhc2lsaXNrKQ0KYWRhdGEgPSBiYXNpbGlza1J1bihmdW4gPSBmdW5jdGlvbihzY2UpIHsNCiAgICAgIyBDb252ZXJ0IFNDRSB0byBBbm5EYXRhOg0KICAgICBTQ0UyQW5uRGF0YShzY2UpDQogICAgIA0KfSwgZW52ID0gY29uZGFfcGF0aCwgc2NlID0gc2NlKQ0KYGBgDQoNCiMjIFByb2Nlc3MgY291bnQgZGF0YSB1c2luZyBpa2FydXMNCg0KSWYgdGhlIGlucHV0ICoqYWRhdGEqKiBvYmplY3QgY29udGFpbnMgY291bnRzLCB0aGUgbWF0cml4DQpjYW4gYmUgcHJvY2Vzc2VkIHVzaW5nIGlrYXJ1cyAqKnByZXByb2Nlc3NfYWRhdGEqKiBmdW5jdGlvbi4NCg0KSWYgdGhlIGRhdGEgaXMgYWxyZWFkeSBub3JtYWxpemVkLCBubyBwcmVwcm9jZXNzaW5nIGlzIHJlcXVpcmVkDQoNCioqSU1QT1JUQU5UKiogdGhlIGRhdGEgc2hvdWxkIG5vdCBiZSBzY2FsZWQhDQoNCmBgYHtyIHByb2Nlc3NfZGF0YSwgZXZhbD1GQUxTRX0NCmFkYXRhID0gaWthcnVzJGRhdGEkcHJlcHJvY2Vzc19hZGF0YShhZGF0YSkNCmBgYA0KDQoNCg0K