GSEA dot plot

[48]:
library(ggplot2)
library(dplyr)
library(stringr)
library(forcats) ## for reordering the factor

[49]:
df = read.csv("gsea.tsv",sep="\t")
head(df)
A data.frame: 6 × 7
Termesnespvalfdrgeneset_sizematched_size
<chr><dbl><dbl><int><int><int><int>
1HALLMARK_UV_RESPONSE_DN -0.4888729-2.1755770014440
2HALLMARK_MITOTIC_SPINDLE -0.3717000-2.1002630019987
3translational termination 0.7299984 3.66810300 9647
4translational elongation 0.6999150 3.7111680010455
5mitochondrial respiratory chain complex I assembly 0.7694024 3.75807400 5837
6mitochondrial ATP synthesis coupled electron transport 0.8161274 4.02820700 7150
[50]:
df$gene_ratio = df$matched_size/df$geneset_size
head(df)
A data.frame: 6 × 8
Termesnespvalfdrgeneset_sizematched_sizegene_ratio
<chr><dbl><dbl><int><int><int><int><dbl>
1HALLMARK_UV_RESPONSE_DN -0.4888729-2.17557700144400.2777778
2HALLMARK_MITOTIC_SPINDLE -0.3717000-2.10026300199870.4371859
3translational termination 0.7299984 3.66810300 96470.4895833
4translational elongation 0.6999150 3.71116800104550.5288462
5mitochondrial respiratory chain complex I assembly 0.7694024 3.75807400 58370.6379310
6mitochondrial ATP synthesis coupled electron transport 0.8161274 4.02820700 71500.7042254
[51]:
# pvalue is all zero, no point to use pvalue for color or size
[52]:
options(repr.plot.width = 10, repr.plot.height = 8)

ggplot(df, aes(x = gene_ratio, y = fct_reorder(Term, gene_ratio))) +
               geom_point(aes(size = matched_size, color = nes)) +
               theme_bw(base_size = 16) +
        scale_color_gradient2(low="blue", high="red",midpoint=1)+
        ylab(NULL) +
        ggtitle("Gene Set Enrichment Analysis")
ggsave("GSEA.pdf",height=8,width=10)
../_images/jupyter_notebooks_GSEA_dotplot_5_0.png