#!/usr/bin/env R library("clusterProfiler") # Make enrichment analysis library("limma") # A lots of math-related operations library("DOSE") # Disease Ontology library("enrichplot") # Awesome graphs library("pathview") # Nice pathway plot library("org.At.tair.db") # A. Thaliana annotation ################################## # PART 1 : Introduction # From DESeq2, we have # a large table with many columns ################################## deseq_genes <- read.table( file="tables/KOvsWT.complete.txt", sep="\t", header=TRUE ) print(names(deseq_genes)) print(head(deseq_genes$Id)) ##################################################### # PART 2: Gene Identifiers # For a computer, gene:AT1G01010 ≠ AT1G01010 # For a human, AT1G01010 is horrible to remember # We need to clean identifiers, # and add human-readable names ##################################################### # Fix identifiers for the computer head(deseq_genes$Id) deseq_genes$Id <- sub("gene:", "", deseq_genes$Id) head(deseq_genes$Id) # Translate for for humans annotation <- bitr( geneID = deseq_genes$Id, # Our gene list fromType = "TAIR", # We have TAIR ID toType = c("ENTREZID", "SYMBOL"),# Other ID list OrgDb = org.At.tair.db # Our annotation ) print(head(annotation)) # Add the translation to the result table deseq_genes <- merge( x = deseq_genes, y = annotation, by.x = "Id", by.y = "TAIR" ) print(head(deseq_genes)) ####################################################### # PART 3: Gene sets # We need to filter differentially expressed genes # in order to perform ORA. ####################################################### # Filter DEG de_genes <- deseq_genes[deseq_genes[, "padj"] <= 0.001, ] de_genes <- de_genes[!is.na(de_genes[, "log2FoldChange"]), ] dim(deseq_genes) dim(de_genes) # Perform ORA ego <- enrichGO( gene = de_genes$ENTREZID, # Ranked gene list universe= deseq_genes$ENTREZID,#All genes OrgDb = org.At.tair.db, # Annotation keyType = "ENTREZID", # The genes ID ont = "CC", # Cellular Components pvalueCutoff = 1, # Significance Threshold pAdjustMethod = "BH", # Adjustment method readable = TRUE # For human beings ) # Plot ORA results barplot(ego, showCategory=15) dotplot(object = ego, showCategory=15) # Search for enriched terms that relates to roots res_ego <- ego@result print(head(res_ego, 3)) roots <- res_ego[with(res_ego, grepl("root", Description)), ] print(head(roots)) # Correct the previous ORA to include biological processes # instead of cellular components ego <- enrichGO( gene = de_genes$ENTREZID, # Ranked gene list universe= deseq_genes$ENTREZID,#All genes OrgDb = org.At.tair.db, # Annotation keyType = "ENTREZID", # The genes ID ont = "BP", # Biological Process pvalueCutoff = 1, # Significance Threshold pAdjustMethod = "BH", # Adjustment method readable = TRUE # For human beings ) # Plot corrected results barplot(ego, showCategory=15) dotplot(object = ego, showCategory=15) # Search for roots res_ego <- ego@result roots <- res_ego[with(res_ego, grepl("root", Description)), ] print(head(roots)) ################################################################## # PART 4: GSEA # We need to build a named vector which contains sorted numbers # Then we can run a Gene set enrichment analysis ################################################################### # Explore results to guess the right column to extract print(colnames(deseq_genes)) # We choose to use the 'stat' column geneList <- as.numeric(de_genes$stat) names(geneList) <- de_genes$ENTREZID geneList <- sort(geneList, decreasing=TRUE) # Run GSEA on Gene Onthology gsea <- gseGO( geneList = geneList, # Ranked gene list ont = "BP", # Biological Process OrgDb = org.At.tair.db, # Annotation keyType = "ENTREZID", # Identifiers pAdjustMethod = "BH", # Pvalue Adjustment pvalueCutoff = 1 # Significance Threshold ) # Explore results columns_of_interest <- c( "Description", "enrichmentScore", "p.adjust" ) head( x = gsea[, columns_of_interest], # Pathway ID 8 # lines to display ) # Search plant organ morphogenesis gsea_line <- match( "plant organ morphogenesis", gsea$Description ) gseaplot2( x = gsea, # Our analysis geneSetID = gsea$ID[gsea_line], # Pathway ID title = "plant organ morphogenesis" # Its name ) # Look at the roots pathways gsea_line <- match( "root morphogenesis", gsea$Description ) gseaplot2( x = gsea, # Our analysis geneSetID = gsea$ID[gsea_line], # Pathway ID title = "root morphogenesis" # Its name ) # Plot multiple pathways on the same graph gseaplot2( x = gsea, geneSetID = 1:3, title = "Most enriched terms" ) ############################################## # PART 5: Compare sets # We want to compare pathways to each others ############################################## # Use a Heatmap heatplot( x = ego, # Our enrichment showCategory = 15, # Nb of terms to display foldChange = geneList[1:10] # Our fold changes ) # Use an upsetplot upsetplot(x = ego) # From our enrichment analysis # Use an enrichment map emapplot(ego) # From our enrichment analysis # Relate enriched terms with each others: goplot(ego) # From our enrichment analysis # Plot complete pathways #names(geneList) <- de_genes$TAIR # Use TAIR id pv.out <- pathview( gene.data = geneList, # Our gene list pathway.id = "ath00630", # Our pathway species = "ath", # Our organism # The color limits limit = list(gene=max(abs(geneList))), gene.idtype = "ENTREZID" # The genes identifiers )