The dataset used in the report is the England’s Immigrants 1330 - 1550 dataset, which contains over 64,000 names of people known to have migrated to England during the period of the Hundred Years’ War and the Black Death, the Wars of the Roses and the Reformation.
library(tidyverse)
library(sp)
library(splitstackshape)
library(rnaturalearth)
library(geosphere)
library(RColorBrewer)
library(leaflet)
library(leaflet.extras)
General data cleansing and creating relevant fields
e.g.
Looking at the dataset, we can create two new features that are the sum of the wealth and payable features (wealth_total_l, wealth_total_s, wealth_total_d; wealth_payable_l, and wealth_payable_s, wealth_payable_d, respectively). This is due to ÂŁsd (pronounced Lsd) being the name for the pre-decimal currencies that were common throughout Europe.
According to Retrowow, there were 12 pence in a shilling and 20 shillings in a pound. Therefore, there were 240 pennies in a pound.
immigrants <- read.csv(paste0(wd$data, "/", "englands_immigrants_search_results.csv"), stringsAsFactors = FALSE, sep = ",", na.strings = c(NA, "", " "), skipNul = TRUE) %>%
as_tibble() %>%
dplyr::select(-c(page_number, person_record_content, person_notes, biographical_notes, aliases, document_iddocument, archive, reference, part, content_start_date, content_start_date_type, content_end_date, content_end_date_type, document_notes)) %>%
filter(!is.na(wealth_total_l) | !is.na(wealth_total_s) | !is.na(wealth_total_d)) %>%
mutate(wealth_total_l = ifelse(is.na(wealth_total_l), 0, wealth_total_l), wealth_total_s = ifelse(is.na(wealth_total_s), 0, wealth_total_s), wealth_total_d = ifelse(is.na(wealth_total_d), 0, wealth_total_d)) %>%
mutate(pennies_total = (wealth_total_l * 20) + (wealth_total_s * 12) + wealth_total_d)
immigrants %>%
group_by(gender) %>%
summarise(wealth_l = sum(wealth_total_l, na.rm = TRUE),
wealth_s = sum(wealth_total_s, na.rm = TRUE),
wealth_d = sum(wealth_total_d, na.rm = TRUE),
wealth_lsd = sum(pennies_total, na.rm = TRUE),
Total_count = n()) %>%
ggplot(aes(x = gender, y = wealth_lsd)) +
geom_bar(stat = "identity")
immigrants %>%
group_by(gender, document_type) %>%
summarise(count = n(),
wealth_lsd = sum(pennies_total, na.rm = TRUE)) %>%
arrange(desc(wealth_lsd))
## # A tibble: 4 x 4
## # Groups: gender [3]
## gender document_type count wealth_lsd
## <chr> <chr> <int> <dbl>
## 1 Male tax assessment 1244 396912
## 2 Female tax assessment 6 1920
## 3 Unknown tax assessment 4 960
## 4 Male letters of denization 1 400
immigrants %>%
group_by(gender, social_statuses) %>%
summarise(count = n(),
wealth_lsd = sum(pennies_total, na.rm = TRUE)) %>%
arrange(desc(wealth_lsd))
## # A tibble: 4 x 4
## # Groups: gender [3]
## gender social_statuses count wealth_lsd
## <chr> <chr> <int> <dbl>
## 1 Male <NA> 1245 397312
## 2 Female <NA> 3 1200
## 3 Unknown <NA> 4 960
## 4 Female widow 3 720
immigrants <- cSplit(immigrants, 'origin_region_modern', ',')
immigrants1 <- immigrants %>%
filter(!is.na(origin_region_modern_1) & !is.na(residence_county)) %>%
group_by(origin_region_modern_1, residence_county, pennies_total) %>%
summarise("Number_of_People" = n()) %>%
arrange(desc(pennies_total)) %>%
rename("Origin" = origin_region_modern_1, "Residence" = residence_county, "Wealth" = pennies_total)
head(immigrants1)
## # A tibble: 6 x 4
## # Groups: Origin, Residence [5]
## Origin Residence Wealth Number_of_People
## <fct> <chr> <dbl> <int>
## 1 France Devon 1200 2
## 2 France Somerset 1200 1
## 3 Germany Devon 1200 1
## 4 Germany Kent 800 1
## 5 France Cornwall 720 1
## 6 France Devon 720 2
immigrants2 <- immigrants1 %>%
as.data.frame() %>%
group_by(Residence) %>%
summarise(Total_wealth = sum(Wealth)) %>%
arrange(desc(Total_wealth))
head(immigrants2)
## # A tibble: 6 x 2
## Residence Total_wealth
## <chr> <dbl>
## 1 Devon 7240
## 2 Kent 4100
## 3 Suffolk 3760
## 4 Cornwall 3240
## 5 Dorset 3012
## 6 Essex 3000
countries <- ne_countries()
countries$longitude <- coordinates(countries)[,1]
countries$latitude <- coordinates(countries)[,2]
countries_xy <- countries@data %>%
dplyr::select(admin, countries_longitude = longitude, countries_latitude = latitude)
counties <- ne_states(iso_a2 = "GB")
counties_xy <- counties@data %>%
dplyr::select(name, counties_longitude = longitude, counties_latitude = latitude)
df3 <- immigrants1 %>%
left_join(countries_xy, by = c('Origin' = 'admin')) %>%
left_join(counties_xy, by = c('Residence' = 'name'))
completeFun <- function(data, desiredCols) {
completeVec <- complete.cases(data[, desiredCols])
return(data[completeVec, ])
}
df4 <- df3[complete.cases(df3), ]
flows <- gcIntermediate(df4[,5:6], df4[,7:8], sp = TRUE, addStartEnd = TRUE)
flows$wealth <- df4$Wealth
flows$origins <- df4$Origin
flows$destinations <- df4$Residence
hover <- paste0(flows$origins, " to ",
flows$destinations, ': ',
as.character(flows$wealth), " lsd")
pal <- colorFactor(brewer.pal(4, 'Set1'), flows$origins)
bins <- c(0, 240, 720, 1320, 3006, Inf)
pal_uk <- colorBin("YlOrRd", domain = immigrants2$Total_wealth, bins = bins)
leaflet(flows) %>%
setView(lng = -2, lat = 54.5, zoom = 5) %>%
addProviderTiles('CartoDB.DarkMatter') %>%
addLegend(pal = pal, values = ~origins) %>%
addPolylines(weight = ~wealth / 80, label = hover,
group = ~origins, color = ~pal(origins),
labelOptions = labelOptions(textsize = "15px")) %>%
addLayersControl(overlayGroups = unique(flows$origins),
options = layersControlOptions(collapsed = FALSE)) %>%
addResetMapButton() %>%
addFullscreenControl()