# 差异表达分析：使用 R 进行 RNA-Seq 数据分析

在现代生物学研究中，RNA-Seq（RNA 测序）已经成为解析基因表达的强大工具。差异表达分析是其中一个重要步骤，通过比较不同条件下的基因表达水平，我们可以找出哪些基因在不同的生物条件下表现出显著变化。

Summary_of_RNA-Seq

# 差异表达分析

差异表达分析（Differential Expression Analysis）是指通过比较不同条件或处理下的基因表达水平，找出那些显著变化的基因。这些基因的变化可能与某些生物学过程、疾病状态或环境响应有关。

# 使用的软件包

limma ：用于线性模型拟合和差异表达分析。
edgeR ：用于 RNA-Seq 数据的标准化和处理。
dplyr ：用于数据操作。

# 函数实现

在此我们将构建一个名为 runLimma 的函数，该函数的输入参数为：

表达矩阵（ exp ）
样本分组信息（ group ）
是否执行标准化（ normalize ）
是否生成有方向的对比（ directional ）

其能够：

处理 RNA-Seq 数据的归一化
生成详细的结果总结
自动生成所有可能的对比矩阵，考虑所有组别之间的对比

让我们一步步讲解这个函数是如何工作的。

# 数据准备和输入

首先，我们需要准备两个输入：

exp ：表达矩阵，行代表基因，列代表样本。
group ：分组信息，每个样本对应一个实验条件或组别。

	runLimma <- function(exp, group, normalize = TRUE, directional = FALSE) {
	library(limma)
	library(edgeR)
	library(dplyr)

# 分组信息和设计矩阵

我们将分组信息转换为因子向量，并创建设计矩阵。设计矩阵用于描述实验的设计和条件，告诉我们哪些样本属于哪一组。

	# 分组信息变为因子向量
	conditions <- factor(group)

	# 设计矩阵
	design <- model.matrix(~0 + conditions)
	colnames(design) <- levels(conditions)

# RNA-Seq 数据归一化

RNA-Seq 数据通常需要标准化，以便减少技术变异的影响，使不同样本之间的基因表达水平可以更准确地比较。但是如果数据已经经过了标准化，我们可以跳过这一步：

	# RNA-Seq 数据归一化
	if (normalize) {
	dge <- DGEList(counts = as.matrix(exp), group = conditions)
	dge <- calcNormFactors(dge)
	v <- voom(dge, design, plot = TRUE)
	exprs <- v$E
	} else {
	exprs <- as.matrix(exp)
	}

这里我们使用 edgeR 包中的 DGEList 和 calcNormFactors 函数对数据进行标准化处理。 voom 函数进一步处理数据，使其适用于线性模型分析。

# 线性模型和经验贝叶斯分析

接下来，我们使用 lmFit 和 eBayes 函数进行线性模型拟合和贝叶斯统计。这一步帮助我们找到在不同组别间显著变化的基因。

	# 线性模型和经验贝叶斯
	fit <- lmFit(exprs, design)
	fit <- eBayes(fit)

# 生成对比矩阵

我们自动生成所有可能的对比矩阵，考虑所有组别之间的对比。

对比矩阵帮助我们定义哪些组别之间进行比较。

如果 directional 设置为 TRUE ，函数会生成双向对比（例如 N-T 和 T-N ），否则只生成一个方向的对比（可能是 N-T 或 T-N ）。

	# 自动生成所有可能的对比矩阵
	unique_conditions <- levels(conditions)
	num_conditions <- length(unique_conditions)
	contrast_str <- c()
	for (i in 1:(num_conditions - 1)) {
	for (j in (i + 1):num_conditions) {
	contrast_name <- paste0(unique_conditions[i], "-", unique_conditions[j])
	contrast_str <- c(contrast_str, contrast_name)
	if (directional) {
	contrast_name_reverse <- paste0(unique_conditions[j], "-", unique_conditions[i])
	contrast_str <- c(contrast_str, contrast_name_reverse)
	}
	}
	}
	contrasts <- makeContrasts(contrasts = contrast_str, levels = colnames(design))

# 应用对比矩阵并生成详细结果

使用生成的对比矩阵，我们可以对数据进行差异表达分析，并生成详细的结果总结，包括每个对比下的显著变化基因及其调控方向（上调或下调）。

	# 应用对比矩阵
	fit2 <- contrasts.fit(fit, contrasts)
	fit2 <- eBayes(fit2)

	# 生成详细的结果总结
	summary_results <- list()
	for (contrast_name in contrast_str) {
	tT <- topTable(fit2, coef = contrast_name, adjust = "fdr", sort.by = "B", number = nrow(exprs))
	regulated <- ifelse(tT$logFC > 0, "Up", "Down")
	tT$Regulation <- regulated
	lfcs <- c(log2(1.2), log2(1.3), log2(1.5), 1)
	all.deg.cnt <- cbind()
	for (lfc in lfcs) {
	deg1 <- regulated[which(abs(tT$logFC) > lfc & tT$P.Value < 0.05)]
	deg2 <- regulated[which(abs(tT$logFC) > lfc & tT$P.Value < 0.01)]
	deg3 <- regulated[which(abs(tT$logFC) > lfc & tT$adj.P.Val < 0.1)]
	deg4 <- regulated[which(abs(tT$logFC) > lfc & tT$adj.P.Val < 0.05)]
	deg5 <- regulated[which(abs(tT$logFC) > lfc & tT$adj.P.Val < 0.01)]
	all.deg.cnt <- cbind(all.deg.cnt, c(
	paste0(sum(deg1 == "Up"), "\|", sum(deg1 == "Down")),
	paste0(sum(deg2 == "Up"), "\|", sum(deg2 == "Down")),
	paste0(sum(deg3 == "Up"), "\|", sum(deg3 == "Down")),
	paste0(sum(deg4 == "Up"), "\|", sum(deg4 == "Down")),
	paste0(sum(deg5 == "Up"), "\|", sum(deg5 == "Down"))
	))
	}
	row.names(all.deg.cnt) <- c("p<0.05", "p<0.01", "FDR<0.1", "FDR<0.05", "FDR<0.01")
	colnames(all.deg.cnt) <- paste0(c("1.2", "1.3", "1.5", "2"), "-fold")
	summary_results[[contrast_name]] <- list(DEG = tT, Summary = all.deg.cnt)
	}

	results <- list(
	CombinedFit = fit2,
	DetailedResults = summary_results
	)
	return(results)
	}

# 提取结果

getSummary 函数用于打印某个对比的总结信息。

	getSummary <- function(results, contrast_name) {
	if (!contrast_name %in% names(results$DetailedResults)) {
	stop("The specified contrast is not found in the results.")
	}

	summary <- results$DetailedResults[[contrast_name]]$Summary
	print(summary)
	}

getDEGs 函数用于根据指定的 p-value 阈值（默认 0.05）、指定的 fdr 阈值（默认 0.05）和 logFC 阈值（默认 1）获取某个对比中的差异表达基因。

	getDEGs <- function(results, contrast_name, pvalue_threshold = 0.05, fdr_threshold = 0.05, logFC_threshold = 1) {
	if (!contrast_name %in% names(results$DetailedResults)) {
	stop("The specified contrast is not found in the results.")
	}

	tT <- results$DetailedResults[[contrast_name]]$DEG
	degs <- tT[which(tT$P.Value < pvalue_threshold & tT$adj.P.Val < fdr_threshold & abs(tT$logFC) > logFC_threshold), ]
	return(degs)
	}

为了获取所有基因来绘制火山图，我们可以对此函数稍作修改：

	getDEGs <- function(results, contrast_name, pvalue_threshold = 0.05, fdr_threshold = 0.05, logFC_threshold = 1) {
	if (!contrast_name %in% names(results$DetailedResults)) {
	stop("The specified contrast is not found in the results.")
	}

	tT <- results$DetailedResults[[contrast_name]]$DEG
	if (is.null(pvalue_threshold) && is.null(fdr_threshold) && is.null(logFC_threshold)) {
	degs <- tT
	} else {
	degs <- tT[tT$P.Value < pvalue_threshold & tT$adj.P.Val < fdr_threshold & abs(tT$logFC) > logFC_threshold, ]
	}

	return(degs)
	}

当 pvalue_threshold == fdr_threshold == logFC_threshold == NULL 时，获取所有的基因

allGenes <- getDEGs(results, "T-N", pvalue_threshold = NULL, fdr_threshold = NULL, logFC_threshold = NULL)

# 绘制火山图

limma 包的分析结果中，虽然最后一列 Regulation 已经标注了基因的上下调信息，但它仅仅是根据 logFC 列的正负来标注的，所以我们新建一列重新标注：

allGenes <- getDEGs(results, "T-N", pvalue_threshold = NULL, fdr_threshold = NULL, logFC_threshold = NULL)

significance <- ifelse((allGenes$`adj.P.Val` < 0.05 & abs(allGenes$logFC) > 1), ifelse(allGenes$logFC > 1, "Up", "Down"), "NS")

allGenes$Significance <- significance

使用 ggplot2 绘图：

	ggplot(allGenes, aes(logFC, -log10(adj.P.Val))) +
	geom_point(aes(col = Significance)) +
	scale_color_manual(values=c("#0072B5", "grey", "#BC3C28")) +
	labs(title = " ") +
	geom_vline(xintercept = c(-1, 1), colour = "black", linetype = "dashed") +
	geom_hline(yintercept = -log10(0.05), colour = "black", linetype = "dashed") +
	theme(plot.title = element_text(size = 16, hjust = 0.5, face = "bold")) +
	labs(x = "log2(FoldChange)", y = "-log10(Pvalue)") +
	theme(axis.text = element_text(size = 13), axis.title = element_text(size = 13)) +
	theme_bw()
	ggsave("DEGs.png", width = 5, height = 4, dpi = 300)

也可以写成函数：

	volcanoPlot <- function(data, logFC_col, pvalue_col, title = "Volcano Plot", significance_col = "Significance", colors = c("#0072B5", "grey", "#BC3C28")) {
	# 检查颜色向量长度
	if (length(colors) != 3) {
	stop("Please provide exactly three colors.")
	}

	# 将列名字符串转换为 symbols
	log2FC_sym <- sym(logFC_col)
	pvalue_sym <- sym(pvalue_col)
	significance_sym <- sym(significance_col)

	mytheme <- theme_bw() +
	theme(plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
	axis.text = element_text(size = 13),
	axis.title = element_text(size = 13))

	# 绘图
	ggplot(data, aes(x = !!log2FC_sym, y = -log10(!!pvalue_sym), color = !!significance_sym)) +
	geom_point() +
	scale_color_manual(values = colors) +
	labs(title = title, x = "log2(FoldChange)", y = "-log10(Pvalue)") +
	geom_vline(xintercept = c(-1, 1), colour = "black", linetype = "dashed") +
	geom_hline(yintercept = -log10(0.05), colour = "black", linetype = "dashed") +
	mytheme
	}

	volcanoPlot(allGenes, logFC_col = "logFC", pvalue_col = "adj.P.Val")

关于 sym() 和 !! 的用法

在 R 语言中， sym() 和 !! 这两个概念属于非标准评估（non-standard evaluation, NSE）的范畴，这是一个非常强大但稍显复杂的主题，特别是在处理动态变量名时非常有用。

sym() 函数

sym() 函数是 rlang 包提供的一个工具，用于将字符串转换为符号（symbol）。在 R 中，符号是一种对象类型，用于在表达式中代表变量名。

用途：当你需要将字符串动态转换为可以由函数理解的变量名时， sym() 就非常有用。例如，如果你有一个变量的名称存储为字符串，而你需要用这个变量名在 ggplot2 或 dplyr 的函数中指定数据列，这时就需要用到 sym() 。

!! 运算符（读作 “bang-bang”）

!! 是 rlang 包中的一个运算符，用于在函数内部 强制展开 符号，使其在表达式中被当作实际的变量名处理。

用途： !! 使得你可以在编写通用函数或包含变量输入的函数时，将符号作为实际的变量名在表达式中使用。这在使用如 ggplot2 、 dplyr 等包进行编程时尤其有用，因为这些包经常使用非标准评估。

示例

假设你有一个数据框 df ，并且你想根据用户的输入来选择哪一列进行操作。用户的输入是一个字符串，如 "height" 。你不能直接在如 ggplot 或 dplyr 的函数中使用 "height" 字符串，因为这些函数期望的是列名，而不是包含列名的字符串。这时，你可以使用 sym() 和 !! ：

	library(rlang)
	library(ggplot2)
	library(dplyr)

	# 假设的数据框和用户输入
	df <- data.frame(height = rnorm(100, 175, 10), weight = rnorm(100, 70, 5))
	column_name <- "height"

	# 使用 sym () 和！！来动态选择列
	height_symbol <- sym(column_name) # 将字符串转换为符号
	df %>% summarise(Average = mean(!!height_symbol)) # 使用！！来展开符号

在这个例子中：

sym(column_name) 将字符串 "height" 转换为符号 height 。
!!height_symbol 在 summarise() 函数中将符号 height 展开为实际的列名。

这样，你就可以根据字符串变量动态地引用数据框中的列，而无需硬编码列名。这对于编写灵活且可重用的数据处理和可视化代码非常重要。

如果我们想为一些基因添加注释，可以使用 geom_label() 或 geom_text() 添加注释图层，这两个函数的区别在于， geom_text() 将文本直接叠加到图像中，而 geom_label() 添加的文本有一个矩形框背景，可以更好地提升文本的可读性。

但是在实际的应用中，由于标签重叠等问题，更常用的是 ggrepel 包中的 geom_label_repel() 或 geom_text_repel() 函数：

	install.packages("ggrepel")
	library(ggrepel)

修改后的 volcanoPlot() 函数：

	volcanoPlot <- function(data, logFC_col, pvalue_col, title = "Volcano Plot", significance_col = "Significance", colors = c("#0072B5", "grey", "#BC3C28"), labels = NULL) {
	# 检查颜色向量长度
	if (length(colors) != 3) {
	stop("Please provide exactly three colors.")
	}

	# 将列名字符串转换为 symbols
	log2FC_sym <- sym(logFC_col)
	pvalue_sym <- sym(pvalue_col)
	significance_sym <- sym(significance_col)

	# 如果提供了 labels，添加一个新列用于标注
	if (!is.null(labels)) {
	data$Label <- ifelse(rownames(data) %in% labels, 1, 0)
	} else {
	data$Label <- 0
	}

	mytheme <- theme_bw() +
	theme(plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
	axis.text = element_text(size = 13),
	axis.title = element_text(size = 13))

	# 绘图
	p <- ggplot(data, aes(x = !!log2FC_sym, y = -log10(!!pvalue_sym), color = !!significance_sym)) +
	geom_point(alpha = 0.5) +
	guides(color = guide_legend(override.aes = list(size = 5))) +
	scale_color_manual(values = colors) +
	labs(title = title, x = "log2(FoldChange)", y = "-log10(Pvalue)") +
	geom_vline(xintercept = c(-1, 1), colour = "black", linetype = "dashed") +
	geom_hline(yintercept = -log10(0.05), colour = "black", linetype = "dashed") +
	mytheme

	# 如果有标签，添加标签
	if (!is.null(labels)) {
	p <- p + geom_text_repel(aes(label = ifelse(Label == 1, rownames(data), "")),
	arrow = arrow(ends="first", length = unit(0.01, "npc")), show.legend = F)
	}

	return(p)
	}

# 完整代码

下面是完整的函数代码：

	library(readxl)
	library(dplyr)
	library(limma)
	library(edgeR)
	library(ggplot2)
	library(ggrepel)
	library(rlang)

	# 执行差异表达分析的函数
	runLimma <- function(exp, group, normalize = TRUE, directional = FALSE) {
	# 分组信息变为因子向量
	conditions <- factor(group)

	# 设计矩阵
	design <- model.matrix(~0 + conditions)
	colnames(design) <- levels(conditions)

	if (normalize) {
	# RNA-Seq 数据归一化
	dge <- DGEList(counts = as.matrix(exp), group = conditions)
	dge <- calcNormFactors(dge)
	v <- voom(dge, design, plot = TRUE)
	exprs <- v$E
	} else {
	exprs <- as.matrix(exp)
	}

	# 线性模型和经验贝叶斯
	fit <- lmFit(exprs, design)
	fit <- eBayes(fit)

	# 自动生成所有可能的对比矩阵
	unique_conditions <- levels(conditions)
	num_conditions <- length(unique_conditions)
	contrast_str <- c()
	for (i in 1:(num_conditions - 1)) {
	for (j in (i + 1):num_conditions) {
	contrast_name <- paste0(unique_conditions[i], "-", unique_conditions[j])
	contrast_str <- c(contrast_str, contrast_name)
	if (directional) {
	contrast_name_reverse <- paste0(unique_conditions[j], "-", unique_conditions[i])
	contrast_str <- c(contrast_str, contrast_name_reverse)
	}
	}
	}
	contrasts <- makeContrasts(contrasts = contrast_str, levels = colnames(design))

	fit2 <- contrasts.fit(fit, contrasts)
	fit2 <- eBayes(fit2)

	# 生成详细的结果总结
	summary_results <- list()
	for (contrast_name in contrast_str) {
	tT <- topTable(fit2, coef = contrast_name, adjust = "fdr", sort.by = "B", number = nrow(exprs))
	regulated <- ifelse(tT$logFC > 0, "Up", "Down")
	tT$Regulation <- regulated
	lfcs <- c(log2(1.2), log2(1.3), log2(1.5), 1)
	all.deg.cnt <- cbind()
	for (lfc in lfcs) {
	deg1 <- regulated[which(abs(tT$logFC) > lfc & tT$P.Value < 0.05)]
	deg2 <- regulated[which(abs(tT$logFC) > lfc & tT$P.Value < 0.01)]
	deg3 <- regulated[which(abs(tT$logFC) > lfc & tT$adj.P.Val < 0.1)]
	deg4 <- regulated[which(abs(tT$logFC) > lfc & tT$adj.P.Val < 0.05)]
	deg5 <- regulated[which(abs(tT$logFC) > lfc & tT$adj.P.Val < 0.01)]
	all.deg.cnt <- cbind(all.deg.cnt, c(
	paste0(sum(deg1 == "Up"), "\|", sum(deg1 == "Down")),
	paste0(sum(deg2 == "Up"), "\|", sum(deg2 == "Down")),
	paste0(sum(deg3 == "Up"), "\|", sum(deg3 == "Down")),
	paste0(sum(deg4 == "Up"), "\|", sum(deg4 == "Down")),
	paste0(sum(deg5 == "Up"), "\|", sum(deg5 == "Down"))
	))
	}
	row.names(all.deg.cnt) <- c("p<0.05", "p<0.01", "FDR<0.1", "FDR<0.05", "FDR<0.01")
	colnames(all.deg.cnt) <- paste0(c("1.2", "1.3", "1.5", "2"), "-fold")
	summary_results[[contrast_name]] <- list(DEG = tT, Summary = all.deg.cnt)
	}

	results <- list(
	CombinedFit = fit2,
	DetailedResults = summary_results
	)
	return(results)
	}

	# 获得总结的函数
	getSummary <- function(results, contrast_name) {
	if (!contrast_name %in% names(results$DetailedResults)) {
	stop("The specified contrast is not found in the results.")
	}

	summary <- results$DetailedResults[[contrast_name]]$Summary
	print(summary)
	}

	# 获得差异基因的函数
	getDEGs <- function(results, contrast_name, pvalue_threshold = 0.05, fdr_threshold = 0.05, logFC_threshold = 1) {
	if (!contrast_name %in% names(results$DetailedResults)) {
	stop("The specified contrast is not found in the results.")
	}

	tT <- results$DetailedResults[[contrast_name]]$DEG
	if (is.null(pvalue_threshold) && is.null(fdr_threshold) && is.null(logFC_threshold)) {
	degs <- tT
	} else {
	degs <- tT[tT$P.Value < pvalue_threshold & tT$adj.P.Val < fdr_threshold & abs(tT$logFC) > logFC_threshold, ]
	}

	return(degs)
	}

	# 绘制火山图的函数
	volcanoPlot <- function(data, logFC_col, pvalue_col, title = "Volcano Plot", significance_col = "Significance", colors = c("#0072B5", "grey", "#BC3C28"), labels = NULL) {
	# 检查颜色向量长度
	if (length(colors) != 3) {
	stop("Please provide exactly three colors.")
	}

	# 将列名字符串转换为 symbols
	log2FC_sym <- sym(logFC_col)
	pvalue_sym <- sym(pvalue_col)
	significance_sym <- sym(significance_col)

	# 如果提供了 labels，添加一个新列用于标注
	if (!is.null(labels)) {
	data$Label <- ifelse(rownames(data) %in% labels, 1, 0)
	} else {
	data$Label <- 0
	}

	mytheme <- theme_bw() +
	theme(plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
	axis.text = element_text(size = 13),
	axis.title = element_text(size = 13))

	# 绘图
	p <- ggplot(data, aes(x = !!log2FC_sym, y = -log10(!!pvalue_sym), color = !!significance_sym)) +
	geom_point(alpha = 0.5) +
	guides(color = guide_legend(override.aes = list(size = 5))) +
	scale_color_manual(values = colors) +
	labs(title = title, x = "log2(FoldChange)", y = "-log10(Pvalue)") +
	geom_vline(xintercept = c(-1, 1), colour = "black", linetype = "dashed") +
	geom_hline(yintercept = -log10(0.05), colour = "black", linetype = "dashed") +
	mytheme

	# 如果有标签，添加标签
	if (!is.null(labels)) {
	p <- p + geom_text_repel(aes(label = ifelse(Label == 1, rownames(data), "")),
	arrow = arrow(ends="first", length = unit(0.01, "npc")), show.legend = F)
	}

	return(p)
	}

# 结果解读

通常经过差异表达分析我们会获得如下结果：

	> head(degs)
	logFC AveExpr t P.Value adj.P.Val B Regulation
	MESDC2 -5.406474 5.2581480 -9.940661 3.711317e-19 7.509108e-15 32.68792 Down
	CLVS1 -3.035352 8.3649303 -9.702325 1.823051e-18 1.844289e-14 31.15791 Down
	MTBP -2.883240 9.0324125 -9.492411 7.326974e-18 4.941556e-14 29.82074 Down
	GUCA2A 2.694356 -0.1384359 9.348538 1.889189e-17 9.555991e-14 28.91025 Up
	GUCA2B 2.743741 -0.2315600 9.287302 2.822673e-17 1.142223e-13 28.52427 Up
	METTL5 -2.568758 9.4407518 -9.206254 4.795137e-17 1.617000e-13 28.01489 Down

我们来逐列解释这些结果：

logFC（对数折叠变化，Log Fold Change）：
- logFC 表示目标基因在不同条件（如实验组和对照组）之间表达水平的对数变化值，通常以 2 为底数。其负值表示在实验组中下调，正值表示上调。
- 例如，如果我们做了一个肿瘤组织（T）与癌旁组织（N）的差异表达分析（T-N）：如果 $logFC>0$ ，即 $log_2FC>0$ ，即 $FC>1$ ，也就是说 $\frac{T 的表达水平}{N 的表达水平}>1$ ，所以 T 组上调。
AveExpr（平均表达值，Average Expression）：
- 这是基因在所有样本中的平均表达水平。
- 例如，MESDC2 的 AveExpr 为 5.2581480，表示其在所有样本中的平均表达值为 5.2581480。
t（t 统计量，T-statistic）：
- 这是用于检验基因表达差异显著性的 t 检验统计量。
- 例如，MESDC2 的 t 值 为 -9.940661，表示其在对照组和实验组之间的表达差异非常显著。
P.Value（P 值，P-Value）：
- 这是 t 检验的原始 P 值，表示基因表达差异的显著性水平。较小的 P 值表示差异更显著。
- 例如，MESDC2 的 P 值 为 3.711317e-19，表示其表达差异极其显著。
adj.P.Val（调整后的 P 值，Adjusted P-Value）：
- 这是经过多重检验校正后的 P 值（例如，使用 Benjamini-Hochberg 方法）。调整后的 P 值用于控制假阳性率。
- 例如，MESDC2 的 调整后的 P 值 为 7.509108e-15，仍然表示其表达差异非常显著。
B（B 统计量，B-Statistic）：
- 这是基于贝叶斯统计方法计算的 B 值，用于估计基因为差异表达基因的概率。较高的 B 值表示更高的可信度。
- 例如，MESDC2 的 B 值 为 32.68792，表示其差异表达的可信度非常高。
Regulation（调控方向，Regulation）：
- 这是根据 logFC 的符号确定的调控方向，“Up” 表示上调，“Down” 表示下调。
- 例如，MESDC2 的调控方向为 “Down”，表示其在实验组中表达下调。

通过上述结果，我们可以看到几个基因在对照组和实验组之间的表达差异显著。例如，MESDC2 的表达在实验组中显著下调，而 GUCA2A 的表达在实验组中显著上调。调整后的 P 值和 B 统计量进一步验证了这些差异的显著性和可信度。

R 生信 RNA-Seq