SCA_scRNASEQ_TISSUE_FETAL_LIVER <- readRDS("G:/SCA_scRNASEQ_TISSUE_FETAL_LIVER.RDS")
install.packages("SeuratObject")
SCA_CyTOF_TISSUE_WHOLE_BLOOD_SDY1389_Whole_Blood <- readRDS("G:/Erica/test/SCA_CyTOF_TISSUE_WHOLE_BLOOD_SDY1389_Whole_Blood.RDS")
install.packages("Matrix")
install.packages("NMF")
library(NMF)
install.packages ("Biobase")
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("Biobase")
BiocManager::install("Biobase")
BiocManager::install("Biobase")
BiocManager::install("Biobase")
BiocManager::install("Biobase")
BiocManager::install("BiocGenerics")
BiocManager::install("BiocGenerics")
BiocManager::install("BiocGenerics")
BiocManager::install("BiocGenerics")
help("sys.on.exit")
browser()
a=1
b=2
browser()
a
system.exit
exit
# 假设我们有一个函数，我们怀疑它在处理某些输入时会出错
my_function <- function(x) {
y <- x * 2
browser() # 进入调试模式
z <- y + 10
return(z)
}
my_function(5)
ls()
x
x==6
ls()
ls()
z
data_matrix <- matrix(rnorm(100), nrow=10)
rownames(data_matrix) <- paste("Metabolite", 1:10)
colnames(data_matrix) <- paste("Sample", 1:10)
data = data_matrix
data
ranks <- apply(data, 2, rank, ties.method = "average")
ranks
avg_ranks <- rowMeans(data[ranks])
avg_ranks <- rowMeans(ranks)
avg_ranks
normalized_data <- matrix(avg_ranks, nrow = nrow(data), byrow = TRUE)
normalized_data
for (i in 1:ncol(data)) {
normalized_data[, i] <- normalized_data[, i][order(ranks[, i])]
}
for (i in 1:ncol(data)) {
normalized_data[, i] <- data[, i][order(ranks[, i])]
}
for (i in 1:ncol(data)) {
print( data[, i][order(ranks[, i])])
}
ranks <- apply(data, 2, rank, ties.method = "average")
ranks
avg_ranks <- rowMeans(data[ranks])
avg_ranks <- apply(ranks, 1, function(x) mean(data[x]))
avg_ranks
normalized_data <- matrix(avg_ranks, nrow = nrow(data), byrow = TRUE)
normalized_data
for (i in 1:ncol(data)) {
normalized_data[, i] <- normalized_data[, i][order(ranks[, i])]
}
normalized_data <- matrix(avg_ranks, nrow = nrow(data), ncol = ncol(data), byrow = FALSE)
normalized_data
avg_ranks
for (i in 1:ncol(data)) {
normalized_data[, i] <- normalized_data[order(ranks[, i]), i]
}
normalized_data
data_matrix <- matrix(rnorm(100), nrow=10)
rownames(data_matrix) <- paste("Metabolite", 1:10)
colnames(data_matrix) <- paste("Sample", 1:10)
# 打印原始数据矩阵
print("Original Data Matrix:")
print(data_matrix)
data = data_matrix
ranks <- apply(data, 2, rank, ties.method = "average")
# 计算排序后的平均值
avg_ranks <- apply(ranks, 1, function(x) mean(data[x]))
avg-ranks
avg_ranks
normalized_data <- matrix(avg_ranks, nrow = nrow(data), ncol = ncol(data), byrow = FALSE)
normalized_data
help("prcomp")
help(c)
help("list")
install.packages("ggfortify")
install.packages("ggplot2")
setwd("G:/MetaboLights/test/workflow_test")
library(ggfortify) # 用于自动绘制prcomp对象的图
library(ggplot2)
x  = read.csv("./norm.csv", row.names = 1, check.names = FALSE)
x
setwd("G:/MetaboLights/test/workflow_test")
library(ggfortify) # 用于自动绘制prcomp对象的图
library(ggplot2)   # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
data <- x[, -c("sample_name","class","color")];
pca = function(x) {
library(ggfortify) # 用于自动绘制prcomp对象的图
library(ggplot2)   # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
# data <- x[, -c("sample_name","class","color")];
data <- as.data.frame(x);
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
data[, "color"] = NULL;
data_pca <- prcomp(data, scale. = TRUE, rank = 3) # 进行PCA分析，排除最后一列（物种）
# 查看PCA结果
summary(data_pca)
# 使用ggfortify包自动绘制PCA图
autoplot(data_pca, data = x, colour = 'color', label = TRUE, shape = FALSE)
# 或者使用ggplot2自定义绘图
pca_data <- as.data.frame(data_pca$x) # 转换PCA结果为数据框
pca_data$class <- x$class      # 添加物种信息列
ggplot(pca_data, aes(x = PC1, y = PC2, color = class)) +
geom_point() +  # 绘制点
theme_minimal() +  # 使用简洁主题
labs(color = 'Sample Class') +  # 添加图例标题
ggtitle("PCA of Expression Data") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
}
pca(x)
pca = function(x) {
library(ggfortify) # 用于自动绘制prcomp对象的图
library(ggplot2)   # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
# data <- x[, -c("sample_name","class","color")];
data <- as.data.frame(x);
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
data[, "color"] = NULL;
data_pca <- prcomp(data, scale. = TRUE, rank = 3) # 进行PCA分析，排除最后一列（物种）
# 查看PCA结果
summary(data_pca)
# 使用ggfortify包自动绘制PCA图，并添加置信区间椭圆
autoplot(data_pca, data = x, colour = 'class', label = TRUE, shape = FALSE) +
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", alpha = 0.2) +
theme_minimal() +
labs(color = 'class') +
ggtitle("PCA of Iris Data with Confidence Ellipses") +
theme(legend.position = "bottom")
}
pca(x)
pca = function(x) {
library(ggfortify) # 用于自动绘制prcomp对象的图
library(ggplot2)   # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
# data <- x[, -c("sample_name","class","color")];
data <- as.data.frame(x);
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
data[, "color"] = NULL;
data_pca <- prcomp(data, scale. = TRUE, rank = 3) # 进行PCA分析，排除最后一列（物种）
# 查看PCA结果
summary(data_pca)
# 将PCA结果转换为数据框，并添加物种信息
pca_data <- as.data.frame(data_pca$x)
pca_data$class <- x$class
# 使用ggplot2自定义绘图，并为每个species类别添加置信区间椭圆
ggplot(pca_data, aes(x = PC1, y = PC2, color = class, group = class)) +
geom_point() +  # 绘制点
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", alpha = 0.2) +  # 添加置信区间椭圆
theme_minimal() +  # 使用简洁主题
labs(color = 'class') +  # 添加图例标题
ggtitle("PCA of Expression Data with Confidence Ellipses by Species") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
}
pca(x)
pca = function(x) {
library(ggfortify) # 用于自动绘制prcomp对象的图
library(ggplot2)   # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
# data <- x[, -c("sample_name","class","color")];
data <- as.data.frame(x);
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
data[, "color"] = NULL;
data_pca <- prcomp(data, scale. = TRUE, rank = 3) # 进行PCA分析，排除最后一列（物种）
# 查看PCA结果
summary(data_pca)
# 将PCA结果转换为数据框，并添加物种信息
pca_data <- as.data.frame(data_pca$x)
pca_data$class <- x$class
# 使用ggplot2自定义绘图，并为每个species类别添加置信区间椭圆
ggplot(pca_data, aes(x = PC1, y = PC2, color = class, group = class)) +
geom_point() +  # 绘制点
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", alpha = 0.2, fill = NA) +  # 添加置信区间椭圆
theme_minimal() +  # 使用简洁主题
labs(color = 'class') +  # 添加图例标题
ggtitle("PCA of Expression Data with Confidence Ellipses by Species") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
}
pca(x)
install.packages("mixOmics"0)
install.packages("mixOmics")
install.packages("ropls")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("ropls")
pca(x)
x
plsda = function(x) {
# 载入所需的R包
library(ropls) # 用于PLS-DA分析
library(ggplot2) # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
# 由于iris数据集是连续的，我们需要将其转换为分类变量
data = as.data.frame(x);
data_class <- as.factor(data$class)
data[, "color"] = NULL;
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
# 进行PLS-DA分析
# 首先，我们需要将数据转换为ropls所需的格式
data_pls <- opls(data = data, class = data_class, predI = 3)
# 提取PLS-DA得分
plsda_scores <- as.data.frame(data_pls@scores$X)
# 将PLS-DA得分和物种信息合并为一个数据框
plsda_data <- data.frame(class = data_class, plsda_scores)
# 使用ggplot2自定义绘图
ggplot(plsda_data, aes(x = Score1, y = Score2, color = class, group = class)) +
geom_point() +  # 绘制点
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", fill = NA, alpha = 0.2) +  # 添加置信区间椭圆
theme_minimal() +  # 使用简洁主题
labs(color = 'class') +  # 添加图例标题
ggtitle("PLS-DA of Expression Data with Confidence Ellipses by Class") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
}
plsda(x)
help(opls)
plsda = function(x) {
# 载入所需的R包
library(ropls) # 用于PLS-DA分析
library(ggplot2) # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
# 由于iris数据集是连续的，我们需要将其转换为分类变量
data = as.data.frame(x);
data_class <- as.factor(data$class)
data[, "color"] = NULL;
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
# 进行PLS-DA分析
# 首先，我们需要将数据转换为ropls所需的格式
data_pls <- opls(data, y = data_class, predI = 3)
# 提取PLS-DA得分
plsda_scores <- as.data.frame(data_pls@scores$X)
# 将PLS-DA得分和物种信息合并为一个数据框
plsda_data <- data.frame(class = data_class, plsda_scores)
# 使用ggplot2自定义绘图
ggplot(plsda_data, aes(x = Score1, y = Score2, color = class, group = class)) +
geom_point() +  # 绘制点
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", fill = NA, alpha = 0.2) +  # 添加置信区间椭圆
theme_minimal() +  # 使用简洁主题
labs(color = 'class') +  # 添加图例标题
ggtitle("PLS-DA of Expression Data with Confidence Ellipses by Class") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
}
plsda(x)
data = as.data.frame(x);
data_class <- as.factor(data$class)
data[, "color"] = NULL;
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
data
data_pls <- opls(data, y = data_class, predI = 3)
data_pls
summary( data_pls)
str( data_pls)
plsda_scores <- as.data.frame(data_pls@scoreMN$X)
plsda_scores <- as.data.frame(data_pls@scoreMN)
plsda_scores
plsda_scores <- as.data.frame(data_pls@scoreMN);
plsda_scores <- data.frame(
Score1 = plsda_scores$p1,
Score2 = plsda_scores$p2,
Score3 = plsda_scores$p3,
);
plsda_scores
plsda_scores <- data.frame(
Score1 = plsda_scores$p1,
Score2 = plsda_scores$p2,
Score3 = plsda_scores$p3
);
plsda_data <- data.frame(class = data_class, plsda_scores)
ggplot(plsda_data, aes(x = Score1, y = Score2, color = class, group = class)) +
geom_point() +  # 绘制点
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", fill = NA, alpha = 0.2) +  # 添加置信区间椭圆
theme_minimal() +  # 使用简洁主题
labs(color = 'class') +  # 添加图例标题
ggtitle("PLS-DA of Expression Data with Confidence Ellipses by Class") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
plsda = function(x, oplsda = FALSE) {
# 载入所需的R包
library(ropls) # 用于PLS-DA分析
library(ggplot2) # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
# 由于iris数据集是连续的，我们需要将其转换为分类变量
data = as.data.frame(x);
data_class <- as.factor(data$class)
data_pls <- NULL;
data[, "color"] = NULL;
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
# 进行PLS-DA分析
# 首先，我们需要将数据转换为ropls所需的格式
if (oplsda) {
data_pls <- opls(data, y = data_class, predI = 3, orthoI = NA)
} else {
data_pls <- opls(data, y = data_class, predI = 3)
}
# 提取PLS-DA得分
plsda_scores <- as.data.frame(data_pls@scoreMN);
plsda_scores <- data.frame(
Score1 = plsda_scores$p1,
Score2 = plsda_scores$p2,
Score3 = plsda_scores$p3
);
# 将PLS-DA得分和物种信息合并为一个数据框
plsda_data <- data.frame(class = data_class, plsda_scores)
# 使用ggplot2自定义绘图
ggplot(plsda_data, aes(x = Score1, y = Score2, color = class, group = class)) +
geom_point() +  # 绘制点
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", fill = NA, alpha = 0.2) +  # 添加置信区间椭圆
theme_minimal() +  # 使用简洁主题
labs(color = 'class') +  # 添加图例标题
ggtitle("PLS-DA of Expression Data with Confidence Ellipses by Class") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
}
plsda(x)
plsda(x, oplsda = TRUE)
help(svg)
plsda = function(x, oplsda = FALSE) {
# 载入所需的R包
library(ropls) # 用于PLS-DA分析
library(ggplot2) # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
# 由于iris数据集是连续的，我们需要将其转换为分类变量
data = as.data.frame(x);
data_class <- as.factor(data$class)
data_pls <- NULL;
data[, "color"] = NULL;
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
# 进行PLS-DA分析
svg(filename = "ropls.svg");
if (oplsda) {
data_pls <- opls(data, y = data_class, predI = 3, orthoI = NA)
} else {
data_pls <- opls(data, y = data_class, predI = 3)
}
dev.off();
# 提取PLS-DA得分
plsda_scores <- as.data.frame(data_pls@scoreMN);
plsda_scores <- data.frame(
Score1 = plsda_scores$p1,
Score2 = plsda_scores$p2,
Score3 = plsda_scores$p3
);
# 将PLS-DA得分和物种信息合并为一个数据框
plsda_data <- data.frame(class = data_class, plsda_scores)
svg(filename = "plsda.svg");
# 使用ggplot2自定义绘图
ggplot(plsda_data, aes(x = Score1, y = Score2, color = class, group = class)) +
geom_point() +  # 绘制点
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", fill = NA, alpha = 0.2) +  # 添加置信区间椭圆
theme_minimal() +  # 使用简洁主题
labs(color = 'class') +  # 添加图例标题
ggtitle("PLS-DA of Expression Data with Confidence Ellipses by Class") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
dev.off();
}
oplsda = function(x) {
workdir = getwd();
class = unique(x$class);
sample_class = x$class;
for(class_name in class) {
v <- as.character(sample_class);
v[v != class_name] = "Other";
x[,"class"] = v;
dir.create(class_name);
setwd(class_name);
plsda(x, oplsda = TRUE);
setwd(workdir);
}
}
oplsda(x)
sample_class = x$class;
class_name = sample_class[1]
class_name
v <- as.character(sample_class);
v[v != class_name] = "Other";
x[,"class"] = v;
data = as.data.frame(x);
data_class <- as.factor(data$class)
data_pls <- NULL;
data[, "color"] = NULL;
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
data = as.data.frame(x);
data_class <- as.factor(data$class)
data_pls <- NULL;
data[, "color"] = NULL;
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
head(data)
data[, "color"] = NULL;
View(data)
data_pls <- opls(data, y = data_class, predI = 3, orthoI = NA)
str(data_pls)
plsda_scores <- as.data.frame(data_pls@orthoScoreMN);
plsda_scores
plsda_scores <- data.frame(
Score1 = plsda_scores$o1,
Score2 = plsda_scores$o2,
Score3 = plsda_scores$o3
);
plsda_data <- data.frame(class = data_class, plsda_scores)
ggplot(plsda_data, aes(x = Score1, y = Score2, color = class, group = class)) +
geom_point() +  # 绘制点
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", fill = NA, alpha = 0.2) +  # 添加置信区间椭圆
theme_minimal() +  # 使用简洁主题
labs(color = 'class') +  # 添加图例标题
ggtitle("PLS-DA of Expression Data with Confidence Ellipses by Class") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
plsda = function(x, oplsda = FALSE) {
# 载入所需的R包
library(ropls) # 用于PLS-DA分析
library(ggplot2) # 用于自定义绘图
# 假设你已经有了一个名为data的数据框，其中包含了你的代谢组学数据
# data <- read.csv("path_to_your_data.csv", row.names = 1)
# 由于iris数据集是连续的，我们需要将其转换为分类变量
data = as.data.frame(x);
data_class <- as.factor(data$class)
data_pls <- NULL;
data[, "color"] = NULL;
data[, "sample_name"] = NULL;
data[, "class"] = NULL;
# 进行PLS-DA分析
svg(filename = "ropls.svg");
if (oplsda) {
data_pls <- opls(data, y = data_class, predI = 3, orthoI = NA)
} else {
data_pls <- opls(data, y = data_class, predI = 3)
}
dev.off();
if (oplsda) {
# 提取PLS-DA得分
plsda_scores <- as.data.frame(data_pls@orthoScoreMN);
plsda_scores <- data.frame(
Score1 = plsda_scores$o1,
Score2 = plsda_scores$o2,
Score3 = plsda_scores$o3
);
} else {
# 提取PLS-DA得分
plsda_scores <- as.data.frame(data_pls@scoreMN);
plsda_scores <- data.frame(
Score1 = plsda_scores$p1,
Score2 = plsda_scores$p2,
Score3 = plsda_scores$p3
);
}
# 将PLS-DA得分和物种信息合并为一个数据框
plsda_data <- data.frame(class = data_class, plsda_scores)
svg(filename = "plsda.svg");
# 使用ggplot2自定义绘图
ggplot(plsda_data, aes(x = Score1, y = Score2, color = class, group = class)) +
geom_point() +  # 绘制点
stat_ellipse(type = "norm", level = 0.95, geom = "polygon", fill = NA, alpha = 0.2) +  # 添加置信区间椭圆
theme_minimal() +  # 使用简洁主题
labs(color = 'class') +  # 添加图例标题
ggtitle("PLS-DA of Expression Data with Confidence Ellipses by Class") +  # 添加图表标题
theme(legend.position = "bottom")  # 将图例放在底部
dev.off();
}
oplsda = function(x) {
workdir = getwd();
class = unique(x$class);
sample_class = x$class;
for(class_name in class) {
v <- as.character(sample_class);
v[v != class_name] = "Other";
x[,"class"] = v;
dir.create(class_name);
setwd(class_name);
plsda(x, oplsda = TRUE);
setwd(workdir);
}
}
x = read.csv("./norm.csv", row.names = 1, check.names = FALSE)
setwd("G:/MetaboLights/test/workflow_test")
x = read.csv("./norm.csv", row.names = 1, check.names = FALSE)
oplsda(x)
