Dataset


The dataset used in the report is the England’s Immigrants 1330 - 1550 dataset, which contains over 64,000 names of people known to have migrated to England during the period of the Hundred Years’ War and the Black Death, the Wars of the Roses and the Reformation.

library(tidyverse)
library(sp)
library(splitstackshape) 
library(rnaturalearth)
library(geosphere)
library(RColorBrewer)
library(leaflet)
library(leaflet.extras)

Loading


General data cleansing and creating relevant fields

e.g.

Looking at the dataset, we can create two new features that are the sum of the wealth and payable features (wealth_total_l, wealth_total_s, wealth_total_d; wealth_payable_l, and wealth_payable_s, wealth_payable_d, respectively). This is due to ÂŁsd (pronounced Lsd) being the name for the pre-decimal currencies that were common throughout Europe.

According to Retrowow, there were 12 pence in a shilling and 20 shillings in a pound. Therefore, there were 240 pennies in a pound.

immigrants <- read.csv(paste0(wd$data, "/", "englands_immigrants_search_results.csv"), stringsAsFactors = FALSE, sep = ",", na.strings = c(NA, "", " "), skipNul = TRUE) %>%
  as_tibble() %>%
  dplyr::select(-c(page_number, person_record_content, person_notes, biographical_notes, aliases, document_iddocument, archive, reference, part, content_start_date, content_start_date_type, content_end_date, content_end_date_type, document_notes)) %>%
  filter(!is.na(wealth_total_l) | !is.na(wealth_total_s) | !is.na(wealth_total_d)) %>%
  mutate(wealth_total_l = ifelse(is.na(wealth_total_l), 0, wealth_total_l), wealth_total_s = ifelse(is.na(wealth_total_s), 0, wealth_total_s), wealth_total_d = ifelse(is.na(wealth_total_d), 0, wealth_total_d)) %>%
  mutate(pennies_total = (wealth_total_l * 20) + (wealth_total_s * 12) + wealth_total_d)

Data Exploration


immigrants %>%
  group_by(gender) %>%
  summarise(wealth_l = sum(wealth_total_l, na.rm = TRUE),
            wealth_s = sum(wealth_total_s, na.rm = TRUE),
            wealth_d = sum(wealth_total_d, na.rm = TRUE),
            wealth_lsd = sum(pennies_total, na.rm = TRUE),
            Total_count = n()) %>%
  ggplot(aes(x = gender, y = wealth_lsd)) + 
  geom_bar(stat = "identity")

immigrants %>%
  group_by(gender, document_type) %>%
  summarise(count = n(),
            wealth_lsd = sum(pennies_total, na.rm = TRUE)) %>%
  arrange(desc(wealth_lsd))
## # A tibble: 4 x 4
## # Groups:   gender [3]
##   gender  document_type         count wealth_lsd
##   <chr>   <chr>                 <int>      <dbl>
## 1 Male    tax assessment         1244     396912
## 2 Female  tax assessment            6       1920
## 3 Unknown tax assessment            4        960
## 4 Male    letters of denization     1        400
immigrants %>%
  group_by(gender, social_statuses) %>%
  summarise(count = n(),
            wealth_lsd = sum(pennies_total, na.rm = TRUE)) %>%
  arrange(desc(wealth_lsd))
## # A tibble: 4 x 4
## # Groups:   gender [3]
##   gender  social_statuses count wealth_lsd
##   <chr>   <chr>           <int>      <dbl>
## 1 Male    <NA>             1245     397312
## 2 Female  <NA>                3       1200
## 3 Unknown <NA>                4        960
## 4 Female  widow               3        720
immigrants <- cSplit(immigrants, 'origin_region_modern', ',')

immigrants1 <- immigrants %>% 
  filter(!is.na(origin_region_modern_1) & !is.na(residence_county)) %>%
  group_by(origin_region_modern_1, residence_county, pennies_total) %>%
  summarise("Number_of_People" = n()) %>%
  arrange(desc(pennies_total)) %>%
  rename("Origin" = origin_region_modern_1, "Residence" = residence_county, "Wealth" = pennies_total)

head(immigrants1)
## # A tibble: 6 x 4
## # Groups:   Origin, Residence [5]
##   Origin  Residence Wealth Number_of_People
##   <fct>   <chr>      <dbl>            <int>
## 1 France  Devon       1200                2
## 2 France  Somerset    1200                1
## 3 Germany Devon       1200                1
## 4 Germany Kent         800                1
## 5 France  Cornwall     720                1
## 6 France  Devon        720                2
immigrants2 <- immigrants1 %>%
  as.data.frame() %>%
  group_by(Residence) %>%
  summarise(Total_wealth = sum(Wealth)) %>%
  arrange(desc(Total_wealth))

head(immigrants2)
## # A tibble: 6 x 2
##   Residence Total_wealth
##   <chr>            <dbl>
## 1 Devon             7240
## 2 Kent              4100
## 3 Suffolk           3760
## 4 Cornwall          3240
## 5 Dorset            3012
## 6 Essex             3000

The Flow of Wealth - Interactive Map


countries <- ne_countries()

countries$longitude <- coordinates(countries)[,1]

countries$latitude <- coordinates(countries)[,2]

countries_xy <- countries@data %>%
  dplyr::select(admin, countries_longitude = longitude, countries_latitude = latitude)

counties <- ne_states(iso_a2 = "GB")

counties_xy <- counties@data %>%
  dplyr::select(name, counties_longitude = longitude, counties_latitude = latitude)

df3 <- immigrants1 %>%
  left_join(countries_xy, by = c('Origin' = 'admin')) %>%
  left_join(counties_xy, by = c('Residence' = 'name'))

completeFun <- function(data, desiredCols) {
  completeVec <- complete.cases(data[, desiredCols])
  return(data[completeVec, ])
}

df4 <- df3[complete.cases(df3), ]

flows <- gcIntermediate(df4[,5:6], df4[,7:8], sp = TRUE, addStartEnd = TRUE)

flows$wealth <- df4$Wealth

flows$origins <- df4$Origin

flows$destinations <- df4$Residence

hover <- paste0(flows$origins, " to ", 
                flows$destinations, ': ', 
                as.character(flows$wealth), " lsd")  

pal <- colorFactor(brewer.pal(4, 'Set1'), flows$origins)

bins <- c(0, 240, 720, 1320, 3006, Inf)
pal_uk <- colorBin("YlOrRd", domain = immigrants2$Total_wealth, bins = bins)

leaflet(flows) %>%
  setView(lng = -2, lat = 54.5, zoom = 5) %>%
  addProviderTiles('CartoDB.DarkMatter') %>%
  addLegend(pal = pal, values = ~origins) %>%
  addPolylines(weight = ~wealth / 80, label = hover,
               group = ~origins, color = ~pal(origins),
               labelOptions = labelOptions(textsize = "15px")) %>%
  addLayersControl(overlayGroups = unique(flows$origins), 
                   options = layersControlOptions(collapsed = FALSE)) %>%
  addResetMapButton() %>%
  addFullscreenControl()