class: center, middle, inverse, title-slide .title[ # Geospatial Analysis with R ] .subtitle[ ## Class 13 ] --- ## Election Results <img src="class13_files/figure-html/unnamed-chunk-1-1.png" style="display: block; margin: auto;" /> --- ```r library(tidyverse) library(rvest) URL <- "https://history.house.gov/Institution/Party-Divisions/Party-Divisions/" webpage <- read_html(URL) cong <- as_tibble(html_table(webpage)[[1]]) onms <- colnames(cong) newnms <- c("congress", "seats", "D", "R", "other", "delres") cong <- cong %>% slice((which(grepl("^Republican", `Anti-Administration`))[1] + 1):nrow(.)) %>% filter(`Congress (Years)` != colnames(cong)[1]) %>% mutate(year = gsub("(*.*-)|(*.*–)|)|)2", "", `Congress (Years)`)) %>% mutate(year = as.numeric(year) - 2) %>% rename_at(vars(onms), ~newnms) %>% select(-other, -delres) %>% mutate(seats = substr(seats, 1, 3)) %>% mutate_at(.vars = vars(seats, D, R), as.numeric) %>% mutate(swing = (D - R) / (D + R)) cong %>% ggplot() + geom_line(aes(year, swing)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + geom_hline(yintercept = 0, lty = 2, col = "red") + xlab("") + ylab("(D - R) / (D + R)") + ggtitle("Normalized Party Control Index for US House") + theme_linedraw() + scale_x_continuous(breaks = seq(1859, 2019, 10), expand = c(0, 2)) ``` --- ## Working with data, continued ### `do.call` / `bind_rows` ```r sz <- 100 for(i in 1:3) { set.seed(i) d <- data.frame( id = 1:sz, v1 = runif(sz, min = 2, max = 12), grp = paste0("g", i) ) %>% mutate(v2 = v1 + rnorm(sz, mean = 2, sd = 2)) %>% select(id, v1, v2, grp) readr::write_csv(d, file = file.path(tempdir(), paste0("dataset", i, ".csv"))) } # ggplot(d) + geom_point(aes(x = v1, y = v2)) fs <- list.files(tempdir(), pattern = "dataset.*.csv", full.names = TRUE) # dat <- do.call(rbind, lapply(fs, readr::read_csv)) dat <- bind_rows(lapply(fs, readr::read_csv)) plot(dat$v1, dat$v2) ``` --- ### Manipulating and analyzing data - reshape - mutate - select - joins - split-apply-combine - plotting - regression --- ## Reshape ```r dat %>% pivot_wider(names_from = grp, values_from = v1:v2) dat %>% select(-v1) %>% pivot_wider(names_from = grp, values_from = v2) dat_wide <- dat %>% select(id, v2, grp) %>% pivot_wider(names_from = grp, values_from = v2) dat_long <- dat_wide %>% pivot_longer(g1:g3, names_to = "grp", values_to = "v2") %>% arrange(grp) dat_long2 <- dat_wide %>% pivot_longer(g1:g3, names_to = "grp", values_to = "v2") cbind(dat, dat_long) %>% head() bind_cols(dat, dat_long) ``` --- ## Joins ```r cbind(dat, dat_long2) %>% head() # bind_cols(dat, dat_long) dat %>% left_join(dat_long2) dat_long2 %>% rename(grp2 = grp) %>% select(id, grp2, v2) %>% left_join(dat, ., by = c("id", "v2", "grp" = "grp2")) dat_long2 %>% rename(grp2 = grp) %>% select(id, grp2, v2) %>% left_join(dat %>% select(-v2), ., by = c("id", "grp" = "grp2")) %>% select(id:v1, v2, grp) ``` - Note: understand the differences between `full_join`, `inner_join`, `right_join`, and `left_join`.