class: center, middle, inverse, title-slide .title[ # Geospatial Analysis with R ] .subtitle[ ## Class 10 ] --- ```r library(terra) library(geospaar) # data(dem) topo <- lapply(c("TRI", "flowdir"), function(x) terrain(rast(dem), x)) png(here::here("docs/figures/zambia_topo.png"), width = 7, height = 2.5, res = 300, units = "in", bg = "grey") par(mfrow = c(1, 2))#, mar = c(0, 1, 2, 4), oma = c(0, 1, 0, 2)) plot(topo[[1]], mar = c(0.5, 1, 1, 4), main = "TRI", axes = FALSE) plot(topo[[2]], mar = c(0.5, 1, 1, 4), main = "Flow Direction", axes = FALSE) dev.off() ``` --- ## Data generation Create the following: - `dat`, a data.frame built from `V1`, `V2`, `V3`, and `V4`, where: - `V1` = 1:20 - `V2` is a random sample between 1:100 - `V3` is drawn from a random uniform distribution between 0 and 50 - `V4` is a random selection of the letters A-E - Use `set.seed(50)` - Do this all at once (i.e. wrap the creation of V1-V4 in the `data.frame` call, precede it with `set.seed()`) --- ## Exercises - Use a `for` to iterate over each row of `dat` and calculate it's `sum` - Do the same with `lapply` and `sapply` - Do the same using `rowSums` - Select rows from `dat` containing the letter "E" in `V4`, and take the mean of values from the result in column `V3` --- ## In-class - Using an `lapply`, create an output list of 3 `data.frame`s in which the first `data.frame` has it's `V2` column multiplied by 5, the second `data.frame` has `V2` multiplied by 10, and the third has `V2` multipled by 20. Use `dat` as the starting `data.frame`. - Make a `data.frame` called `dat2` that is a copy of `dat`. Update `dat2` so that its `V3` is set to 40 in the places where values > 40 --- ## Reading and writing data ## File paths Let's read in a csv a few different ways. Full path - clear for you, bad for code sharing. ```r data_tib <- read.csv( "/Users/lestes/Dropbox/teaching/geog246346/geospaar/inst/extdata/cdf_corn.csv" ) str(data_tib) ``` ``` ## 'data.frame': 5999 obs. of 20 variables: ## $ tenant : chr "planet" "planet" "planet" "planet" ... ## $ site_id : chr "UNLEN_CSP1_IMZ1" "UNLEN_CSP1_IMZ1" "UNLEN_CSP1_IMZ1" "UNLEN_CSP1_IMZ1" ... ## $ local : chr "2020-06-05 10:00:00-05:00" "2020-06-06 10:00:00-05:00" "2020-06-07 10:00:00-05:00" "2020-06-08 10:00:00-05:00" ... ## $ lat : num 41.2 41.2 41.2 41.2 41.2 ... ## $ long : num -96.5 -96.5 -96.5 -96.5 -96.5 ... ## $ site_group : chr "Lincoln" "Lincoln" "Lincoln" "Lincoln" ... ## $ Tair : num 27.4 25.5 29.6 27.7 26.7 ... ## $ Tabove : num 27.93 17.43 9.18 8.58 18.71 ... ## $ Tbelow : num 28 24.3 27 25.9 26.6 ... ## $ CGDD : int 345 363 381 393 402 414 425 439 455 471 ... ## $ b1r : num 0.098 0.052 0.0493 0.0484 0.0452 ... ## $ b2r : num 0.0822 0.0762 0.0739 0.0722 0.0676 ... ## $ b3r : num 0.2013 0.0911 0.0863 0.0848 0.0803 ... ## $ b4r : num 0.1447 0.1001 0.0991 0.0958 0.0899 ... ## $ b5r : num 0.0946 0.225 0.2378 0.246 0.2434 ... ## $ b6r : num 0.139 0.282 0.332 0.348 0.334 ... ## $ b7r : num 0.123 0.233 0.259 0.265 0.259 ... ## $ NDVI : num -0.0209 0.4754 0.54 0.5686 0.5755 ... ## $ CropType : chr "Corn" "Corn" "Corn" "Corn" ... ## $ PlantingDate: chr "04/20/2020" "04/20/2020" "04/20/2020" "04/20/2020" ... ``` --- ```r data_tib <- readr::read_csv( "/Users/lestes/Dropbox/teaching/geog246346/geospaar/inst/extdata/cdf_corn.csv" ) ``` ``` ## Rows: 5999 Columns: 20 ## ── Column specification ──────────────────────────────────────────────────────────────────────────── ## Delimiter: "," ## chr (5): tenant, site_id, site_group, CropType, PlantingDate ## dbl (14): lat, long, Tair, Tabove, Tbelow, CGDD, b1r, b2r, b3r, b4r, b5r, b6r, b7r, NDVI ## dttm (1): local ## ## ℹ Use `spec()` to retrieve the full column specification for this data. ## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message. ``` ```r str(data_tib) ``` ``` ## spc_tbl_ [5,999 × 20] (S3: spec_tbl_df/tbl_df/tbl/data.frame) ## $ tenant : chr [1:5999] "planet" "planet" "planet" "planet" ... ## $ site_id : chr [1:5999] "UNLEN_CSP1_IMZ1" "UNLEN_CSP1_IMZ1" "UNLEN_CSP1_IMZ1" "UNLEN_CSP1_IMZ1" ... ## $ local : POSIXct[1:5999], format: "2020-06-05 15:00:00" "2020-06-06 15:00:00" "2020-06-07 15:00:00" ... ## $ lat : num [1:5999] 41.2 41.2 41.2 41.2 41.2 ... ## $ long : num [1:5999] -96.5 -96.5 -96.5 -96.5 -96.5 ... ## $ site_group : chr [1:5999] "Lincoln" "Lincoln" "Lincoln" "Lincoln" ... ## $ Tair : num [1:5999] 27.4 25.5 29.6 27.7 26.7 ... ## $ Tabove : num [1:5999] 27.93 17.43 9.18 8.58 18.71 ... ## $ Tbelow : num [1:5999] 28 24.3 27 25.9 26.6 ... ## $ CGDD : num [1:5999] 345 363 381 393 402 414 425 439 455 471 ... ## $ b1r : num [1:5999] 0.098 0.052 0.0493 0.0484 0.0452 ... ## $ b2r : num [1:5999] 0.0822 0.0762 0.0739 0.0722 0.0676 ... ## $ b3r : num [1:5999] 0.2013 0.0911 0.0863 0.0848 0.0803 ... ## $ b4r : num [1:5999] 0.1447 0.1001 0.0991 0.0958 0.0899 ... ## $ b5r : num [1:5999] 0.0946 0.225 0.2378 0.246 0.2434 ... ## $ b6r : num [1:5999] 0.139 0.282 0.332 0.348 0.334 ... ## $ b7r : num [1:5999] 0.123 0.233 0.259 0.265 0.259 ... ## $ NDVI : num [1:5999] -0.0209 0.4754 0.54 0.5686 0.5755 ... ## $ CropType : chr [1:5999] "Corn" "Corn" "Corn" "Corn" ... ## $ PlantingDate: chr [1:5999] "04/20/2020" "04/20/2020" "04/20/2020" "04/20/2020" ... ## - attr(*, "spec")= ## .. cols( ## .. tenant = col_character(), ## .. site_id = col_character(), ## .. local = col_datetime(format = ""), ## .. lat = col_double(), ## .. long = col_double(), ## .. site_group = col_character(), ## .. Tair = col_double(), ## .. Tabove = col_double(), ## .. Tbelow = col_double(), ## .. CGDD = col_double(), ## .. b1r = col_double(), ## .. b2r = col_double(), ## .. b3r = col_double(), ## .. b4r = col_double(), ## .. b5r = col_double(), ## .. b6r = col_double(), ## .. b7r = col_double(), ## .. NDVI = col_double(), ## .. CropType = col_character(), ## .. PlantingDate = col_character() ## .. ) ## - attr(*, "problems")=<externalptr> ``` --- ## Working directory `"."` - Working directory. Use `getwd()` (from console) - Usually set to project folder. ```r getwd() ## if in an RMD, this will show the folder of the RMD ``` ``` ## [1] "/Users/lestes/Dropbox/teaching/geog246346/geospaar/docs" ``` Use `.` to start a file path from the working directory ```r list.files(".") ## ``` ```r data_tib <- readr::read_csv("./inst/extdata/cdf_corn.csv") ``` - Use ".." to go up one folder level ```r list.files(".") ## files in working directory list.files("..") ## files in folder one level up ``` --- ## User directory `"~"` - Set by environment variable - Use command below to see value ```r path.expand("~") ``` ``` ## [1] "/Users/lestes" ``` ```r data_tib <- readr::read_csv( "~/Dropbox/teaching/geog246346/geospaar/inst/extdata/cdf_corn.csv" ) ``` --- ## Writing files - Use `write.csv` or `readr::write_csv` to write ```r readr::write_csv(data_tib, file = "temp.csv") ## by default writes to wd() ``` --- ## Saving/loading files - If you want to save an R object, like a `data.frame`, `tibble` etc. - Use save, and `.rda` extension ```r save(data_tib, file = "temp.rda") ## by default writes to wd() ``` ```r data_tib <- NULL load(file = "temp.rda") ## loads file back to environment ``` --- ## Dates with `lubridate` - The main function you want to use is `as_date`, which can convert a character to date format. ```r library(lubridate) date1 <- as_date("2022-03-01") ## date in standard YYYY-MM-DD format print(date1) ``` ``` ## [1] "2022-03-01" ``` --- ## Dates with `lubridate` - More challenging with unclear date formats. ```r date2 <- as_date("3/1/22") ## is month or date first? ``` ``` ## Warning: All formats failed to parse. No formats found. ``` ```r date2 ``` ``` ## [1] NA ``` Include format as shown below. See [formats in this link](https://epirhandbook.com/en/working-with-dates.html) ```r date2 <- as_date("3/1/22", format = "%m/%d/%y" ) date2 ``` ``` ## [1] "2022-03-01" ``` We can also write dates in desired format ```r date2_char <- as.character(date2, format = "%A %B %d, %Y") date2_char ``` ``` ## [1] "Tuesday March 01, 2022" ``` --- ## Date formats <img src="figures/class10_date_formats.png" width="100%" style="display: block; margin: auto;" /> --- How can we read in this date? ```r date3 <- as_date("Apr 3, 1999", format = "...") date3 ``` ``` ## [1] NA ```