setwd("G:/MetaboLights/test/workflow_test")
x = read.csv("./norm.csv", row.names = 1, check.names = FALSE)
x
colnames(x)
class = unique(x$class);
class
a = "Healthy"
b=  "Prostate cancer"
workdir = paste(c(a,b), collapse = " vs ");
workdir
i = a == x$class;
j = b == x$class;
i
j
rownames(x)
i = x[i, ];
j = x[j, ];
i
j
colnames(i)
i[, "class"] = NULL;
j[, "class"] = NULL;
i[, "color"] = NULL;
j[, "color"] = NULL;
i[, "sample_name"] = NULL;
j[, "sample_name"] = NULL;
i = t(i);
j = t(j);
i
j
mean_a = rowMeans(i);
mean_a
mean_b = rowMeans(j);
sd_a = apply(i,1, sd);
sd_b = apply(j,1,sd);
sd_a
foldchange = mean_a / mean_b;
t = sapply(1:nrow(i), function(offset) {
v1 = unlist(i[offset,,drop = TRUE]);
v2 = unlist(j[offset,, drop = TRUE]);
t.test(v1,v2, alternative = "two.sided" );
});
t
t = lapply(1:nrow(i), function(offset) {
v1 = unlist(i[offset,,drop = TRUE]);
v2 = unlist(j[offset,, drop = TRUE]);
t.test(v1,v2, alternative = "two.sided" );
});
t
pvalue = sapply(t, function(x) x@p.value);
pvalue = sapply(t, function(x) x$p.value);
pvalue
t = sapply(t, function(x) x$statistic);
t
t = data.frame(
mean_a, mean_b, sd_a, sd_b,foldchange, log(foldchange,base =2),t,pvalue,
row.names = rownames(i)
);
t
colnames(t) = c(
sprintf("meanOf_%s", a), sprintf("meanOf_%s", b),
sprintf("sdOf_%s", a), sprintf("sdOf_%s", b),
"foldchange", "log2FC","t","p.value"
);
t
workdir
write.csv(t, file = file.path(workdir, "ttest.csv"), row.names = TRUE);
dir.create(workdir);
write.csv(t, file = file.path(workdir, "ttest.csv"), row.names = TRUE);
t[, "log10"] = -log10(t$p.value);
t
log_cutoff = 2
pval_cutoff = 0.05
t[, "sig"] = ifelse(t$log2FC > log_cutoff & t$p.value < pval_cutoff, "Upregulated",
ifelse(t$log2FC < -log_cutoff & t$p.value < pval_cutoff, "Downregulated", "Not Significant"));
t
require(ggplot2)
ggplot(t, aes(x=log2FC, y=log10, color=sig)) +
geom_point() +
theme_minimal() +
scale_color_manual(values=c("blue", "red", "grey"), labels=c("Upregulated", "Downregulated", "Not Significant")) +
geom_vline(xintercept=c(-log_cutoff, log_cutoff), linetype="dashed") +
geom_hline(yintercept=-log10(pval_cutoff), linetype="dashed") +
labs(x="log2 Fold Change", y="-log10(p value)", color="Significance") +
theme(legend.position="right")
volcano = function(t, log_cutoff = 2, pval_cutoff = 0.05) {
t[, "log10"] = -log10(t$p.value);
t[, "sig"] = ifelse(t$log2FC > log_cutoff & t$p.value < pval_cutoff, "Upregulated",
ifelse(t$log2FC < -log_cutoff & t$p.value < pval_cutoff, "Downregulated", "Not Significant"));
ggplot(t, aes(x=log2FC, y=log10, color=sig)) +
geom_point() +
theme_minimal() +
scale_color_manual(values=c("blue", "red", "grey"), labels=c("Upregulated", "Downregulated", "Not Significant")) +
geom_vline(xintercept=c(-log_cutoff, log_cutoff), linetype="dashed") +
geom_hline(yintercept=-log10(pval_cutoff), linetype="dashed") +
labs(x="log2 Fold Change", y="-log10(p value)", color="Significance") +
theme(legend.position="right")
}
svg(filename = file.path(workdir, "volcano.svg"));
volcano(t);
dev.off();
x = read.csv("./norm.csv", row.names = 1, check.names = FALSE)
class = unique(x$class);
class
aov_formula = as.formula(paste("response ~", paste(paste0("`", class,"`"), collapse = " * ")));
aov_formula
class = x$class;
class
x[, "class"] = NULL;
x[,"color"] = NULL;
x[, "sample_name"] = NULL;
response = NULL;
group = NULL;
features = ncol(x);
colnames(x)
for(i in 1:nrow(x)) {
response = append(response, unlist(x[i,,drop = TRUE]));
group = append(group, rep(class[i], features));
}
data = data.frame(
response = response,
group = as.factor(group)
);
data
aov_model = aov(response ~ group, data);
aov_model
aov_result = summary(aov_model);
aov_result
tukey_result = TukeyHSD(aov_model);
tukey_result
plot(tukey_result);
tukey_df = as.data.frame(tukey_result$group);
tukey_df
ggplot(tukey_df, aes(x = group1, y = diff, color = p adj < 0.05)) +
ggplot(tukey_df, aes(x = group1, y = diff, color = `p adj` < 0.05)) +
geom_point() +
geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2) +
theme_minimal() +
labs(x = "Group Comparison", y = "Mean Difference", color = "Significant")
tukey_df$Comparison = factor(row.names(tukey_df))
ggplot(tukey_df, aes(x = Comparison, y = diff, color = p.adj < 0.05)) +
geom_point() +
geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2) +
theme_minimal() +
labs(x = "Group Comparison", y = "Mean Difference", color = "Significant") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_color_manual(values = c("red", "black"), labels = c("Significant", "Not Significant"))
ggplot(tukey_df, aes(x = Comparison, y = diff, color = `p adj` < 0.05)) +
geom_point() +
geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2) +
theme_minimal() +
labs(x = "Group Comparison", y = "Mean Difference", color = "Significant") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_color_manual(values = c("red", "black"), labels = c("Significant", "Not Significant"))
setwd("G:/MetaboLights/test/workflow_test")
x = read.csv("./norm.csv", row.names = 1, check.names = FALSE)
head(x)
colnames(x)
colnames(xc)
colnames(x)
x$class
xcms_id = colnames(x)[5]
xcms_id
data = data.frame(
response = as.numeric(x[, xcms_id]),
group = class
);
data
aov_model = aov(response ~ group, data);
aov_result = summary(aov_model);
aov_result
tukey_result = TukeyHSD(aov_model);
tukey_result
summary(tukey_result)
str(tukey_result)
tukey_df = as.data.frame(tukey_result$group);
tukey_result$group
as.data.frame(tukey_result$group)
rm(tukey_df)
rm
rm(list = "tukey_df")
tukey_result = as.data.frame(tukey_result$group);
gc()
x = read.csv("./norm.csv", row.names =1, check.names = FALSE)
setwd("G:/MetaboLights/test/workflow_test")
x = read.csv("./norm.csv", row.names =1, check.names = FALSE)
class = as.factor(x$class);
x[, "class"] = NULL;
x[,"color"] = NULL;
x[, "sample_name"] = NULL;
xcms_id = colnames(x)[1]
xcms_id
data = data.frame(
response = as.numeric(x[, xcms_id]),
group = class
);
data
aov_model = aov(response ~ group, data);
aov_model
aov_result = summary(aov_model);
aov_result
tukey_result = TukeyHSD(aov_model);
tukey_result = as.data.frame(tukey_result$group);
tukey_result
tukey_result$diff
aov_result
str(aov_result)
as.data.frame(aov_result)
as.data.frame(aov_result[[1]])
aov_result = as.data.frame(aov_result[[1]]);
aov_result = aov_result[1,,drop = TRUE];
aov_result
tukey_result
tukey_result = TukeyHSD(aov_model);
tukey_result = TukeyHSD(aov_model);
View(tukey_result)
View(tukey_result)
View(tukey_result)
tukey_result = NULL
