Dataset


The dataset used in the report is the Digimon Database dataset, which contains information on digimon from “Digimon Digimon Story: Cyber Sleuth”, released for Playstation Vita in 2015 and Playstation 4 in 2016.

library(tidyverse)

Loading


digimon_list <- read.csv(paste0(wd$data, "/DigiDB_digimonlist.csv"), stringsAsFactors = FALSE)
head(digimon_list)
##   Number Digimon       Stage Type Attribute Memory Equip.Slots Lv.50.HP
## 1      1 Kuramon        Baby Free   Neutral      2           0      590
## 2      2 Pabumon        Baby Free   Neutral      2           0      950
## 3      3 Punimon        Baby Free   Neutral      2           0      870
## 4      4 Botamon        Baby Free   Neutral      2           0      690
## 5      5 Poyomon        Baby Free   Neutral      2           0      540
## 6      6 Koromon In-Training Free      Fire      3           0      940
##   Lv50.SP Lv50.Atk Lv50.Def Lv50.Int Lv50.Spd
## 1      77       79       69       68       95
## 2      62       76       76       69       68
## 3      50       97       87       50       75
## 4      68       77       95       76       61
## 5      98       54       59       95       86
## 6      52      109       93       52       76

Data Exploration


How many Digimon are there in each Stage?

digimon_list %>%
  group_by(Stage) %>%
  summarise(count = n()) %>%
  arrange(-count)
## # A tibble: 8 x 2
##   Stage       count
##   <chr>       <int>
## 1 Mega           74
## 2 Ultimate       58
## 3 Champion       54
## 4 Rookie         38
## 5 In-Training    11
## 6 Ultra           6
## 7 Baby            5
## 8 Armor           3

And by Stage and Attribute?

digimon_list %>%
  group_by(Stage, Attribute) %>%
  summarise(count = n()) %>%
  arrange(-count)
## # A tibble: 51 x 3
## # Groups:   Stage [8]
##    Stage    Attribute count
##    <chr>    <chr>     <int>
##  1 Mega     Dark         13
##  2 Mega     Light        11
##  3 Champion Fire         10
##  4 Mega     Electric     10
##  5 Ultimate Dark         10
##  6 Champion Earth         8
##  7 Mega     Wind          8
##  8 Ultimate Fire          8
##  9 Champion Plant         7
## 10 Champion Water         7
## # ... with 41 more rows

Identifying the Strongest Digimon in each Stage


Having looked at the frequency of stages and attributes in the data, I was interested in identifying the most powerful Digimon in each Stage by each of the Powers (Lv.50 HP; Lv.50 SP; Lv.50 Atk; Lv.50 Def; Lv.50 Int; Lv.50 Spd)

digi_select <- function(digimon, stage, power_col, n = 3) {
  # subset by stage
  digimon_topn <- digimon[digimon$Stage == stage, ]
  # order by the relevant column index
  digimon_topn <- digimon_topn[order(-digimon_topn[, power_col]), ]
  # top n of results
  digimon_topn <- digimon_topn[1:n, ]
  
  return(digimon_topn)
}
digi_stages <- as.list(unique(digimon_list$Stage))

digi_list <- list()

# the grep below identifies the columns that start with "Lv" and retrieves the index number (e.g. Lv.50 HP)
for (i in (which(grepl("Lv", colnames(digimon_list)) == 1))) { 
  # lapply loops through the digi_stages and uses the dig_select function to select the best Digimon for each stage.  
  digi_top_n <- do.call(rbind, lapply(digi_stages, FUN = function (x) digi_select(digimon_list, x, power = i, n = 1))) %>%
    mutate(Power = colnames(digimon_list)[i])

  digi_list[[i]] <- digi_top_n

}

digi_teams <- data.frame(do.call(rbind, digi_list))
head(digi_teams)
##   Number       Digimon       Stage    Type Attribute Memory Equip.Slots
## 1      2       Pabumon        Baby    Free   Neutral      2           0
## 2     13       Yokomon In-Training    Free     Plant      3           0
## 3     51       Lucemon      Rookie Vaccine     Light     14           1
## 4     60       Woodmon    Champion   Virus     Plant      6           2
## 5    134 ShogunGekomon    Ultimate   Virus     Water     12           2
## 6    179     Gankoomon        Mega    Data      Fire     22           1
##   Lv.50.HP Lv50.SP Lv50.Atk Lv50.Def Lv50.Int Lv50.Spd    Power
## 1      950      62       76       76       69       68 Lv.50.HP
## 2     1040      64       82       82       75       69 Lv.50.HP
## 3     1230     148       59      104      208      119 Lv.50.HP
## 4     1480      74      109      103       89       88 Lv.50.HP
## 5     1980      96      113      113       99       97 Lv.50.HP
## 6     2080      90      188      163      109      138 Lv.50.HP

Which Digimon is most frequently chosen when trying to make a team that consists of the most powerful Digimon in each stage?

digi_teams %>%
  group_by(Digimon) %>%
  summarise(count = n()) %>%
  arrange(-count) %>%
  left_join(digimon_list, by = c("Digimon", "Digimon")) %>%
  select(Digimon, count, Stage, Type, Attribute) %>%
  mutate(perc = paste0(count / 5 * 100, "%"))
## # A tibble: 37 x 6
##    Digimon      count Stage       Type    Attribute perc 
##    <chr>        <int> <chr>       <chr>   <chr>     <chr>
##  1 Lucemon          4 Rookie      Vaccine Light     80%  
##  2 Magnamon         4 Armor       Free    Earth     80%  
##  3 Chaosmon         2 Ultra       Vaccine Neutral   40%  
##  4 Koromon          2 In-Training Free    Fire      40%  
##  5 Lucemon SM       2 Ultra       Virus   Dark      40%  
##  6 Nyaromon         2 In-Training Free    Light     40%  
##  7 Poyomon          2 Baby        Free    Neutral   40%  
##  8 Bakemon          1 Champion    Virus   Dark      20%  
##  9 Barbamon         1 Mega        Virus   Dark      20%  
## 10 Belphemon RM     1 Ultra       Virus   Dark      20%  
## # ... with 27 more rows

Lucemon is the most Powerful Rookie stage Digimon based on 4 out of the 5 Power Attributes.

Magnamon is the most Powerful Armor stage Digimon based on 4 out of the 5 Power Attributes.

Based on the Digimon team that contains the most powerful Digimon from each stage by power, what is the most common Digimon Type?

digi_teams %>%
  group_by(Power, Type) %>%
  summarise(count = n()) %>%  
ggplot(data = ., aes(x = Power, y = count, fill = Type, label = paste0(count / 8 * 100, "%"))) + 
  geom_bar(stat = "identity") + 
  scale_fill_manual(values = c("#4477AA", "#BBBBBB", "#EE6677", "#CCBB44")) + 
  geom_text(size = 3, position = position_stack(vjust = 0.5)) + 
  labs(x = "Power Attribute", title = "Frequency of Digimon Types - ", subtitle = "Team of 8 with Most Powerful Digimon from Each Stage") + 
  theme(
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.y=element_blank(),
        legend.position="bottom",
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank())