library(tidyverse)
library(rvest)

URL <- "https://history.house.gov/Institution/Party-Divisions/Party-Divisions/"
webpage <- read_html(URL)
cong <- as_tibble(html_table(webpage)[[1]])
onms <- colnames(cong)
newnms <- c("congress", "seats", "D", "R", "other", "delres")
cong <- cong %>% 
  slice((which(grepl("^Republican", `Anti-Administration`))[1] + 1):nrow(.)) %>%
  filter(`Congress (Years)` != colnames(cong)[1]) %>% 
  mutate(year = gsub("(*.*-)|(*.*–)|)|)2", "", `Congress (Years)`)) %>%
  mutate(year = as.numeric(year) - 2) %>% 
  rename_at(vars(onms), ~newnms) %>% select(-other, -delres) %>%
  mutate(seats = substr(seats, 1, 3)) %>% 
  mutate_at(.vars = vars(seats, D, R), as.numeric) %>% 
  mutate(swing = (D - R) / (D + R)) 
cong %>%  
  ggplot() + geom_line(aes(year, swing)) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + 
  geom_hline(yintercept = 0, lty = 2, col = "red") + 
  xlab("") + ylab("(D - R) / (D + R)") + 
  ggtitle("Normalized Party Control Index for US House") + theme_linedraw() + 
  scale_x_continuous(breaks = seq(1859, 2019, 10), expand = c(0, 2))

Today

  • Finish up:
    • split-apply-combine
    • plotting
    • regression
  • Intro to spatial
library(sf)
library(dplyr)
sz <- 100
dat <- lapply(1:3, function(x) {
  set.seed(x)
  d <- data.frame(
    id = 1:sz,
    v1 = runif(sz, min = 2, max = 12),
    grp = paste0("g", x)
  ) %>% mutate(v2 = v1 + rnorm(sz, mean = 2, sd = 2)) %>% 
    select(id, v1, v2, grp)
}) %>% bind_rows()
plot(dat$v1, dat$v2)

SAC

tidyverse

dat %>% 
  group_by(grp) %>% 
  # summarize(v2 = mean(v2)) %>%
  # summarize(across(v1:v2, mean)) %>%
  summarize(across(v1:v2, list("mu" = mean, "sd" = sd)))

base

t(sapply(unique(dat$grp), function(x) {
  colMeans(dat[dat$grp == x, c("v1", "v2")])
}))

Graphical SAC

ggplot(dat) + 
  geom_point(aes(x = v1, y = v2, color = grp))
ggplot(dat) + 
  geom_point(aes(x = v1, y = v2)) + 
  facet_wrap(~grp)
  # facet_wrap(~grp, nrow = 2, ncol = 2)
par(mfrow = c(1, 3))
for(i in unique(dat$grp)) {
  plot(v2 ~ v1, data = dat[dat$grp == i, ])
}

Regression

Base

reg <- lm(v2 ~ v1, data = dat)
summary(reg)

plot(dat$v1, dat$v2)
abline(reg, col = "red")

ggplot

ggplot(dat) + 
  geom_point(aes(x = v1, y = v2)) + 
  geom_smooth(aes(x = v1, y = v2), method = "lm")

SAC + Regression

# ? thoughts
ggplot(dat) + 
  geom_point(aes(x = v1, y = v2, color = grp)) + 
  geom_smooth(aes(x = v1, y = v2), method = "lm") + 
  # geom_smooth(aes(x = v1, y = v2, color = grp), method = "lm") #+
  facet_wrap(~grp)

Spatial data

Reading

  • Create spatial points from csv
library(tidyverse)
library(sf)
## read in csv
farmers <- system.file("extdata/farmer_spatial.csv", package = "geospaar") %>% 
  read_csv() 

## convert tibble to 'sf'
farmers_sf <- st_as_sf(farmers, coords = c("x", "y"), crs = 4326)

Reading

  • Reading geojsons
  • Both st_read and read_sf work
districts <- read_sf(system.file("extdata/districts.geojson", 
                                 package = "geospaar"))
roads <- st_read(system.file("extdata/roads.geojson", package = "geospaar"))
Reading layer `roads' from data source 
  `/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/geospaar/extdata/roads.geojson' 
  using driver `GeoJSON'
Simple feature collection with 473 features and 1 field
Geometry type: MULTILINESTRING
Dimension:     XY
Bounding box:  xmin: -292996.9 ymin: -2108848 xmax: 873238.8 ymax: -1008785
Projected CRS: Africa_Albers_Equal_Area_Conic

Writing

  • st_write works as expected
  • Use ‘.geojson’ (only one file)
#st_write(districts, "districts_out.shp")
st_write(districts, "districts2.geojson")

Projections (crs)

  • Geographic coordinates are: 4326
st_crs(farmers_sf) <- 4326
farmers_sf %>% st_crs()
Coordinate Reference System:
  User input: EPSG:4326 
  wkt:
GEOGCRS["WGS 84",
    ENSEMBLE["World Geodetic System 1984 ensemble",
        MEMBER["World Geodetic System 1984 (Transit)"],
        MEMBER["World Geodetic System 1984 (G730)"],
        MEMBER["World Geodetic System 1984 (G873)"],
        MEMBER["World Geodetic System 1984 (G1150)"],
        MEMBER["World Geodetic System 1984 (G1674)"],
        MEMBER["World Geodetic System 1984 (G1762)"],
        MEMBER["World Geodetic System 1984 (G2139)"],
        MEMBER["World Geodetic System 1984 (G2296)"],
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]],
        ENSEMBLEACCURACY[2.0]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    USAGE[
        SCOPE["Horizontal component of 3D system."],
        AREA["World."],
        BBOX[-90,-180,90,180]],
    ID["EPSG",4326]]
# or
farmers_sf %>% st_set_crs(NA)
Simple feature collection with 20984 features and 5 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 24.777 ymin: -18.222 xmax: 33.332 ymax: -8.997
CRS:           NA
# A tibble: 20,984 × 6
   uuid     date       season rained planted         geometry
 * <chr>    <date>      <dbl>  <dbl>   <dbl>          <POINT>
 1 009a8424 2015-11-15      1      0       0 (27.256 -16.926)
 2 00df166f 2015-11-15      1      0       0 (26.942 -16.504)
 3 02671a00 2015-11-15      1      0       0 (27.254 -16.914)
 4 03f4dcca 2015-11-15      1      0       0 (27.237 -16.733)
 5 042cf7b3 2015-11-15      1      0       0 (27.138 -16.807)
 6 05618404 2015-11-15      1      0       0 (26.875 -16.611)
 7 064beba0 2015-11-15      1      1       0 (26.752 -16.862)
 8 083a46a2 2015-11-15      1      0       0 (26.977 -16.765)
 9 08eb2224 2015-11-15      1      0       0 (26.912 -16.248)
10 0ab761d6 2015-11-15      1      0       0  (27.113 -16.95)
# ℹ 20,974 more rows
farmers_sf %>% st_set_crs(4326)
Simple feature collection with 20984 features and 5 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 24.777 ymin: -18.222 xmax: 33.332 ymax: -8.997
Geodetic CRS:  WGS 84
# A tibble: 20,984 × 6
   uuid     date       season rained planted         geometry
 * <chr>    <date>      <dbl>  <dbl>   <dbl>      <POINT [°]>
 1 009a8424 2015-11-15      1      0       0 (27.256 -16.926)
 2 00df166f 2015-11-15      1      0       0 (26.942 -16.504)
 3 02671a00 2015-11-15      1      0       0 (27.254 -16.914)
 4 03f4dcca 2015-11-15      1      0       0 (27.237 -16.733)
 5 042cf7b3 2015-11-15      1      0       0 (27.138 -16.807)
 6 05618404 2015-11-15      1      0       0 (26.875 -16.611)
 7 064beba0 2015-11-15      1      1       0 (26.752 -16.862)
 8 083a46a2 2015-11-15      1      0       0 (26.977 -16.765)
 9 08eb2224 2015-11-15      1      0       0 (26.912 -16.248)
10 0ab761d6 2015-11-15      1      0       0  (27.113 -16.95)
# ℹ 20,974 more rows

projections (use existing)

  • you can set projections based on another layer
  • but the two layers MUST have the same crs (e.g. geographic)
farmers <- read_csv(system.file("extdata/farmer_spatial.csv", 
                                package = "geospaar"))
farmers <- st_as_sf(farmers, coords = c("x", "y")) %>% 
  st_set_crs(st_crs(districts))

farmers %>% st_crs() ## print new crs
Coordinate Reference System:
  User input: WGS 84 
  wkt:
GEOGCRS["WGS 84",
    DATUM["World Geodetic System 1984",
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4326]]

projections (transform)

  • Use st_transform() to project from one crs to another
roads <- st_transform(x = roads, crs = st_crs(districts))
st_crs(roads)
Coordinate Reference System:
  User input: WGS 84 
  wkt:
GEOGCRS["WGS 84",
    DATUM["World Geodetic System 1984",
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4326]]

Features from scratch

pt <- st_point(x = c(28, -14))
pt
#> POINT (28 -14)
#
# #2
pts <- st_multipoint(x = cbind(x = c(27.5, 28, 28.5), y = c(-14.5, -15, -15.5)))
pts
#> MULTIPOINT ((27.5 -14.5), (28 -15), (28.5 -15.5))
#
# #3
sline <- st_linestring(cbind(x = c(27, 27.5, 28), y = c(-15, -15.5, -16)))
sline
#> LINESTRING (27 -15, 27.5 -15.5, 28 -16)
#
# #4
pol <- st_polygon(list(cbind(x = c(26.5, 27.5, 27, 26, 26.5), 
                             y = c(-15.5, -16.5, -17, -16, -15.5))))
pol

Plotting

  • sf’s generic plot() (use add = TRUE)
par(mar = c(0, 0, 0, 0))
plot(districts %>% st_geometry(), col = "grey")
plot(pt, add = TRUE, col = "red", pch = 20)
plot(pts, add = TRUE, col = "blue", pch = 20)
plot(sline, add = TRUE, col = "cyan", lwd = 2)
plot(pol, add = TRUE, col = "orange", lwd = 2)

Plotting (ggplot)

  • Use geom_sf()
ggplot(districts) + geom_sf() +
  geom_sf(data = roads, col = "red") + 
  geom_sf(data = farmers_sf, col = "blue")

Spatial continued

Data

districts <- st_read(system.file("extdata/districts.geojson", 
                                 package = "geospaar"))
farmers_sf <- read_csv(system.file("extdata/farmer_spatial.csv", 
                                   package = "geospaar"))
farmers_s <- st_as_sf(farmers, coords = c("x", "y")) %>% 
  st_set_crs(st_crs(districts))

roads <- st_read(system.file("extdata/roads.geojson", package = "geospaar")) %>% 
  st_transform(x = roads, crs = st_crs(districts))

pt <- st_point(x = c(28, -14))
pts <- st_multipoint(x = cbind(x = c(27.5, 28, 28.5), y = c(-14.5, -15, -15.5)))
sline <- st_linestring(cbind(x = c(27, 27.5, 28), y = c(-15, -15.5, -16)))
pol <- st_polygon(list(cbind(x = c(26.5, 27.5, 27, 26, 26.5), 
                             y = c(-15.5, -16.5, -17, -16, -15.5))))

Plotting (ggplot)

  • Use geom_sf()
ggplot(districts) + geom_sf() +
  geom_sf(data = roads, col = "red") + 
  geom_sf(data = farmers_sf, col = "blue")

Area and Length

  • st_area(), st_length. Default units are in meters
  • How many square m in a square km?
dist_areas <- districts %>% st_area()
dist_areas
dist_areas %>% units::set_units("km^2")

Add area as column

districts <- districts %>% 
  mutate(area = st_area(.) %>% units::set_units("km^2") %>% as.numeric()) 
districts

Plot districts (exercise)

  • Use geom_sf() to plot area
  • Fill districts based on area column.
  • Use scale_fill_continuous , with type = 'viridis'
ggplot(districts) + geom_sf(aes(fill = area)) + 
  scale_fill_continuous(type = "viridis")

Compare areas

districts <- districts %>% 
  st_transform("ESRI:102022") %>% 
  mutate(area2 = as.numeric(st_area(.) / 10000)) %>% 
  st_transform(4326)

ggplot(districts) + 
  geom_histogram(aes(x = area - area2))