1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
library(tidyverse)
library(stringr)
library(KEGGREST)
library(AnnotationForge)
#' Title
#'
#' @param f_emapper_anno eggnog-mapper annotation result
#' @param author Who is the creator of this package? like "xxx <xxx@xxx.xx>"
#' @param tax_id The Taxonomy ID that represents your organism. (NCBI has a nice online browser for finding the one you need)
#' @param genus Single string indicating the genus
#' @param species Single string indicating the species
#'
#' @return OrgDb name
#' @export
#'
#' @examples
makeOrgPackageFromEmapper <- function(f_emapper_anno,
author,
tax_id = "0",
genus = "default",
species = "default") {
# read emapper result
emapper <- read_delim(f_emapper_anno,
"\t", escape_double = FALSE, trim_ws = TRUE)
# extract gene name from emapper
gene_info <- emapper %>%
dplyr::select(GID = query_name, GENENAME = `eggNOG annot`) %>%
na.omit()
# extract go annotation from emapper
gos <- emapper %>%
dplyr::select(query_name, GO_terms) %>%
na.omit()
gene2go = data.frame(GID = character(),
GO = character(),
EVIDENCE = character())
for (row in 1:nrow(gos)) {
the_gid <- gos[row, "query_name"][[1]]
the_gos <- str_split(gos[row,"GO_terms"], ",", simplify = FALSE)[[1]]
df_temp <- data_frame(GID = rep(the_gid, length(the_gos)),
GO = the_gos,
EVIDENCE = rep("IEA", length(the_gos)))
gene2go <- rbind(gene2go, df_temp)
}
# extract kegg pathway annotation from emapper
gene2ko <- emapper %>%
dplyr::select(GID = query_name, Ko = KEGG_KOs) %>%
na.omit()
load(file = "kegg_info.RData")
gene2pathway <- gene2ko %>% left_join(ko2pathway, by = "Ko") %>%
dplyr::select(GID, Pathway) %>%
na.omit()
# make OrgDb
makeOrgPackage(gene_info=gene_info,
go=gene2go,
ko=gene2ko,
pathway=gene2pathway,
# gene2pathway=gene2pathway,
version="0.0.2",
maintainer=author,
author=author,
outputDir = ".",
tax_id=tax_id,
genus=genus,
species=species,
goTable="go")
my_orgdb <- str_c("org.", str_to_upper(str_sub(genus, 1, 1)) , species, ".eg.db", sep = "")
return(my_orgdb)
}
my_orgdb <- makeOrgPackageFromEmapper("input/sesame.modified.annotations",
"zhangxudong <zhangxudong@genek.tv>",
tax_id = "4182",
genus = "Sesamum",
species = "indicum")
if (requireNamespace(my_orgdb, quietly = TRUE))
remove.packages(my_orgdb)
install.packages(my_orgdb, repos = NULL)
library(org.Sindicum.eg.db)
columns(org.Sindicum.eg.db)
|