Geospatial Analysis with R
Class 7
library(stars)
s3url <- glue::glue("/vsis3/activemapper/",
"planet/composite_sr_buf_fix/GS/",
"tile486317_736815_736967.tif") # not accessible
p <- read_stars(s3url, proxy = TRUE)
png(here::here("external/slides/figures/ghana_planet_stars.png"), height = 2.9,
width = 8, units = "in", res = 300, bg = "white")
plot(p)
dev.off()
Today
- Building blocks of R: data types, functions, etc
- Useful functions
- Indexing and subsetting
- Control structures
The R Ecosystem
Common objects
![]()
Credit: L. Song
The R Ecosystem
Vectorized operations
print(v1 + 5) ## addition applied to each element in vector
[1] 6 7 8 9 10 11 12 13 14
print(v1 * 3) ## multiplication applied to each element in vector
[1] 3 6 9 12 15 18 21 24 27
Accessing elements in vector
[1] 3 6 9 12 15 18 21 24 27
v1 <- letters[1:7]
print(paste0("This letter is ", v1)) # paste0 concatenates strings
[1] "This letter is a" "This letter is b" "This letter is c" "This letter is d"
[5] "This letter is e" "This letter is f" "This letter is g"
Question: How would you say “Letter 1 is a”, “Letter 2 is b”, etc.
Creating a matrix
v1 <- 1:9
m1 <- matrix(v1, nrow = 3, byrow = T)
m2 <- cbind(v1 = 1:3, v2 = 1:3, v3 = 0:0)
print(m1)
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 4 5 6
[3,] 7 8 9
v1 v2 v3
[1,] 1 1 0
[2,] 2 2 0
[3,] 3 3 0
Accessing elements in a matrix
m2 <- cbind(v1 = 1:3, v2 = 1:3, v3 = 0:0)
print(m2)
v1 v2 v3
[1,] 1 1 0
[2,] 2 2 0
[3,] 3 3 0
print("access by column name")
[1] "access by column name"
Question: Create a 3 row, 4 column matrix, where the first column is (1, 2, 3), second column is (4, 5, 6 ) etc.
Lists
Lists can mix object of different types.
l <- list("a", 1, 0.5, TRUE)
print(l)
[[1]]
[1] "a"
[[2]]
[1] 1
[[3]]
[1] 0.5
[[4]]
[1] TRUE
print(str(l)) # structure of l
List of 4
$ : chr "a"
$ : num 1
$ : num 0.5
$ : logi TRUE
NULL
Lists
Lists can also be nested.
l1 <- list("a", 1, 0.5, TRUE)
l2 <- list(l1, "test", matrix(1:9, nrow = 3)) ## first element of l2 is a list.
print(l2)
[[1]]
[[1]][[1]]
[1] "a"
[[1]][[2]]
[1] 1
[[1]][[3]]
[1] 0.5
[[1]][[4]]
[1] TRUE
[[2]]
[1] "test"
[[3]]
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
Question: what is the length of l1? what is the length of each element of l1? (use length
function)
Accessing elements in lists
Use double brackets [[ ]]
to access elements in lists.
l1 <- list("a", 1, 0.5, TRUE)
l2 <- list(l1, "test", matrix(1:9, nrow = 3)) ## first element of l2 is a list.
print(l2[[1]])
[[1]]
[1] "a"
[[2]]
[1] 1
[[3]]
[1] 0.5
[[4]]
[1] TRUE
[,1] [,2] [,3]
[1,] 1 4 7
[2,] 2 5 8
[3,] 3 6 9
Question: The first element of l2
is a list. Access this first element from l2
, then the 4th element from l1
Data frames
We will use data frames A LOT. Data frames are really tables, but they are stored in R as lists, with the following conditions.
- Each element in the list represents a column.
- Each element in the list is an atomic vector.
- The length of each vector is the same (the number of rows)
m1 <- cbind(1:3, letters[1:3])
print(m1) # a matrix cannot hold multiple data types.
[,1] [,2]
[1,] "1" "a"
[2,] "2" "b"
[3,] "3" "c"
df <- data.frame(numbers = 1:3, abc = letters[1:3])
print(df) # a data frame CAN hold multiple data types.
numbers abc
1 1 a
2 2 b
3 3 c
Useful functions
print
- You can print just about anything, but as a generic function,
print
works differently for different types of objects.
a <- "test"
df <- data.frame(numbers = 1:3, abc = letters[1:3])
print(a) # a data frame CAN hold multiple data types.
numbers abc
1 1 a
2 2 b
3 3 c
methods(print) ## all the different ways to print...
[1] print.acf*
[2] print.activeConcordance*
[3] print.AES*
[4] print.anova*
[5] print.aov*
[6] print.aovlist*
[7] print.ar*
[8] print.Arima*
[9] print.arima0*
[10] print.AsIs
[11] print.aspell*
[12] print.aspell_inspect_context*
[13] print.bibentry*
[14] print.Bibtex*
[15] print.browseVignettes*
[16] print.by
[17] print.changedFiles*
[18] print.check_bogus_return*
[19] print.check_code_usage_in_package*
[20] print.check_compiled_code*
[21] print.check_demo_index*
[22] print.check_depdef*
[23] print.check_details*
[24] print.check_details_changes*
[25] print.check_doi_db*
[26] print.check_dotInternal*
[27] print.check_make_vars*
[28] print.check_nonAPI_calls*
[29] print.check_package_code_assign_to_globalenv*
[30] print.check_package_code_attach*
[31] print.check_package_code_data_into_globalenv*
[32] print.check_package_code_startup_functions*
[33] print.check_package_code_syntax*
[34] print.check_package_code_unload_functions*
[35] print.check_package_compact_datasets*
[36] print.check_package_CRAN_incoming*
[37] print.check_package_datalist*
[38] print.check_package_datasets*
[39] print.check_package_depends*
[40] print.check_package_description*
[41] print.check_package_description_encoding*
[42] print.check_package_license*
[43] print.check_packages_in_dir*
[44] print.check_packages_used*
[45] print.check_po_files*
[46] print.check_pragmas*
[47] print.check_Rd_line_widths*
[48] print.check_Rd_metadata*
[49] print.check_Rd_xrefs*
[50] print.check_RegSym_calls*
[51] print.check_S3_methods_needing_delayed_registration*
[52] print.check_so_symbols*
[53] print.check_T_and_F*
[54] print.check_url_db*
[55] print.check_vignette_index*
[56] print.checkDocFiles*
[57] print.checkDocStyle*
[58] print.checkFF*
[59] print.checkRd*
[60] print.checkRdContents*
[61] print.checkReplaceFuns*
[62] print.checkS3methods*
[63] print.checkTnF*
[64] print.checkVignettes*
[65] print.citation*
[66] print.cli_ansi_html_style*
[67] print.cli_ansi_string*
[68] print.cli_ansi_style*
[69] print.cli_boxx*
[70] print.cli_diff_chr*
[71] print.cli_doc*
[72] print.cli_progress_demo*
[73] print.cli_rule*
[74] print.cli_sitrep*
[75] print.cli_spark*
[76] print.cli_spinner*
[77] print.cli_tree*
[78] print.codoc*
[79] print.codocClasses*
[80] print.codocData*
[81] print.colorConverter*
[82] print.compactPDF*
[83] print.condition
[84] print.connection
[85] print.CRAN_package_reverse_dependencies_and_views*
[86] print.data.frame
[87] print.Date
[88] print.default
[89] print.dendrogram*
[90] print.density*
[91] print.difftime
[92] print.dist*
[93] print.Dlist
[94] print.DLLInfo
[95] print.DLLInfoList
[96] print.DLLRegisteredRoutines
[97] print.document_context*
[98] print.document_position*
[99] print.document_range*
[100] print.document_selection*
[101] print.dummy_coef*
[102] print.dummy_coef_list*
[103] print.ecdf*
[104] print.eigen
[105] print.evaluate_evaluation*
[106] print.factanal*
[107] print.factor
[108] print.family*
[109] print.fileSnapshot*
[110] print.findLineNumResult*
[111] print.formula*
[112] print.ftable*
[113] print.function
[114] print.getAnywhere*
[115] print.glm*
[116] print.hashtab*
[117] print.hclust*
[118] print.help_files_with_topic*
[119] print.hexmode
[120] print.HoltWinters*
[121] print.hsearch*
[122] print.hsearch_db*
[123] print.htest*
[124] print.html*
[125] print.html_dependency*
[126] print.htmltools.selector*
[127] print.htmltools.selector.list*
[128] print.infl*
[129] print.integrate*
[130] print.isoreg*
[131] print.json*
[132] print.key_missing*
[133] print.kmeans*
[134] print.knitr_kable*
[135] print.Latex*
[136] print.LaTeX*
[137] print.libraryIQR
[138] print.listof
[139] print.lm*
[140] print.loadings*
[141] print.loess*
[142] print.logLik*
[143] print.ls_str*
[144] print.medpolish*
[145] print.MethodsFunction*
[146] print.mtable*
[147] print.NativeRoutineList
[148] print.news_db*
[149] print.nls*
[150] print.noquote
[151] print.numeric_version
[152] print.object_size*
[153] print.octmode
[154] print.packageDescription*
[155] print.packageInfo
[156] print.packageIQR*
[157] print.packageStatus*
[158] print.paged_df*
[159] print.pairwise.htest*
[160] print.person*
[161] print.POSIXct
[162] print.POSIXlt
[163] print.power.htest*
[164] print.ppr*
[165] print.prcomp*
[166] print.princomp*
[167] print.proc_time
[168] print.quosure*
[169] print.quosures*
[170] print.raster*
[171] print.Rconcordance*
[172] print.Rd*
[173] print.recordedplot*
[174] print.restart
[175] print.RGBcolorConverter*
[176] print.RGlyphFont*
[177] print.rlang_box_done*
[178] print.rlang_box_splice*
[179] print.rlang_data_pronoun*
[180] print.rlang_dict*
[181] print.rlang_dyn_array*
[182] print.rlang_envs*
[183] print.rlang_error*
[184] print.rlang_fake_data_pronoun*
[185] print.rlang_lambda_function*
[186] print.rlang_message*
[187] print.rlang_trace*
[188] print.rlang_warning*
[189] print.rlang_zap*
[190] print.rlang:::list_of_conditions*
[191] print.rle
[192] print.rlib_bytes*
[193] print.rlib_error_3_0*
[194] print.rlib_trace_3_0*
[195] print.roman*
[196] print.scalar*
[197] print.sessionInfo*
[198] print.shiny.tag*
[199] print.shiny.tag.env*
[200] print.shiny.tag.list*
[201] print.shiny.tag.query*
[202] print.simple.list
[203] print.smooth.spline*
[204] print.socket*
[205] print.srcfile
[206] print.srcref
[207] print.stepfun*
[208] print.stl*
[209] print.StructTS*
[210] print.subdir_tests*
[211] print.summarize_CRAN_check_status*
[212] print.summary.aov*
[213] print.summary.aovlist*
[214] print.summary.ecdf*
[215] print.summary.glm*
[216] print.summary.lm*
[217] print.summary.loess*
[218] print.summary.manova*
[219] print.summary.nls*
[220] print.summary.packageStatus*
[221] print.summary.ppr*
[222] print.summary.prcomp*
[223] print.summary.princomp*
[224] print.summary.table
[225] print.summary.warnings
[226] print.summaryDefault
[227] print.table
[228] print.tables_aov*
[229] print.terms*
[230] print.ts*
[231] print.tskernel*
[232] print.TukeyHSD*
[233] print.tukeyline*
[234] print.tukeysmooth*
[235] print.undoc*
[236] print.vignette*
[237] print.warnings
[238] print.xfun_md_viewable*
[239] print.xfun_raw_string*
[240] print.xfun_record_results*
[241] print.xfun_rename_seq*
[242] print.xfun_strict_list*
[243] print.xgettext*
[244] print.xngettext*
[245] print.xtabs*
see '?methods' for accessing help and source code
Examining objects
a <- "test"
df <- data.frame(numbers = 1:3, abc = letters[1:3])
ls() ## list objects in global environemtn
[1] "a" "df" "l" "l1" "l2" "m1" "m2" "v1"
'data.frame': 3 obs. of 2 variables:
$ numbers: int 1 2 3
$ abc : chr "a" "b" "c"
Sampling
- Sample without replacement
set.seed(1234) ## set a random seed
v <- 1:100
s <- sample(v, 50)
print(s)
[1] 28 80 22 9 5 38 16 4 86 90 70 79 78 14 56 62 93 84 21 40 92 67 96 66 47
[26] 81 48 3 41 32 42 43 2 54 49 99 51 6 77 29 71 85 57 8 26 17 58 91 60 76
[1] 2 3 4 5 6 8 9 14 16 17 21 22 26 28 29 32 38 40 41 42 43 47 48 49 51
[26] 54 56 57 58 60 62 66 67 70 71 76 77 78 79 80 81 84 85 86 90 91 92 93 96 99
s2 <- sample(v, 50, replace = T)
print(sort(s2))
[1] 2 3 3 6 6 9 10 17 17 19 20 22 22 25 27 28 30 32 35 36 41 41 48 50 51
[26] 55 57 58 58 60 61 63 63 65 66 70 70 71 72 76 77 80 83 85 85 86 86 87 90 96
Does S2 have duplicates?
s2_unique <- unique(s2)
print(length(s2))
print(length(s2) == length(s2_unique))
In-class exercise
- Create the following:
a
: a random vector of integers with 10 elements drawn from 1-20:
- Use the
sample
function with set.seed(10)
- Name the elements of
a
with a vector of names starting with “V1” and ending with “V10”.
- Use the
paste0
function to create those names.
- Create the identical vector of names using the
paste
function.
b
: Using a
as an index to select from letters
d
: Use rnorm
with a mean = 100 and an sd of 20
- Use
?rnorm
to see the help guide.
- Why did I skip
c
?
- Create a list
l
from a
, b
, d
.
- Assign the names of the vectors in
l
to the l
’s elements
2-d structures
- Create the following:
m
: a matrix with three integer columns named “V1”, “V2”, “V3”
- Create each column first as its own vector, then combine
V1
= 1:10
V2
is a random sample between 1:100
V3
is drawn from a random uniform distribution between 0 and 50 - Use the same set.seed(50)
as before
- Inspect the
str
and class
of m
dat
, a data.frame built from V1
, V2
, V3
, and V4
V4
is a random selection of the letters A-E
Functions
Components
function_name <- function(arg1, arg2 = 1:10,
arg3 = ifelse(arg2 == 2, TRUE, FALSE)) {
body
}
Three components of a function:
formals()
: arguments
body()
, the code, which returns the last object generated, unless specified with return(x)
.
environment()
, function finds the values
Unnamed functions are anonymous functions. (Used in *apply
)
Using x
in a function does not change its global value.
x <- 1:10
myfun <- function() {
x * 10
}
myfun()
[1] 10 20 30 40 50 60 70 80 90 100
myfun <- function(x) {
x <- x * 10
return(x)
}
x <- 10
myfun(x = 20)
Each time you run myfun
, a new function environment is created.
myfun <- function(x) {
x <- x * 10
print(environment())
return(x)
}
myfun(x)
<environment: 0x10f0b2438>
<environment: 0x10f0eb7e8>
Global assignment.
Use <<-
to change value of global variable within a function.
a <- 10
myfun <- function(x) {
a <<- x * 10 ## note <<- instead of <-
return(a)
}
myfun(5)
Useful functions
which
finds indices where a condition is true.
a <- which(v %% 3 == 0) ## subset to elements divisible by 3
print(a) ## shows indices where condition is true.
Useful functions
which.min
finds index of min value
v <- sample(1:20, 10)
print(v)
[1] 16 11 4 3 10 18 5 20 6 12
print(which.min(v)) # index of min value
print(which.max(v)) # index of max value
data.frame vs data.table vs. tibble
- all 2D structures.
- data.frame = Base R
- tibble =
tidyverse
- data.table = fast.
For now, we’ll stick to data.frame
data.frame indexing
data.frame
uses the following to subset: [*row conditions, *column conditions]
df <- data.frame(v1 = 1:5, v2 = 6:10)
rownames(df) <- LETTERS[1:5]
print(df)
v1 v2
A 1 6
B 2 7
C 3 8
D 4 9
E 5 10
data.frame indexing
- Index using names.
- Empty index
[ , 'v2]
means “keep all rows”
df[,'v2'] ## column indexing
df[c("A", "B", "D"), ] ## row indexing
data.frame subset
df[df$v1 > 3, ] ## get observations (rows) where first column is larger than 3
Control structures
Branching
- Pay attention to
{ }
placement
a <- 5
if(a > 10) {
print("Greater than 10!")
} else {
print("Less than or equal to 10")
}
[1] "Less than or equal to 10"
Looping
b <- 1:3
for(i in b) print(i)
b <- 1:5
a <- 2
for(i in b){
a <- 2 * a
print(a)
}
[1] 4
[1] 8
[1] 16
[1] 32
[1] 64
*apply
- A special form of looping
- Intended for applying a function to data. Uses anonymous function.
- 3 main kinds:
sapply
, lapply
, apply
sapply
sapply
iterates over input and returns a vector.
v <- 1:10
sapply(v, function(x) x + 10) ## adds 10 to each element in v.
[1] 11 12 13 14 15 16 17 18 19 20
Use { }
for more complicated functions. BUT be careful with order of { }
, ( )
v1 <- 1:10
v2 <- sapply(v1, function(x){
y <- x^2
return(y)
}) #
print(v2)
[1] 1 4 9 16 25 36 49 64 81 100
sapply
If you don’t specify return
, the last object created will be returned.
v1 <- 1:10
v2 <- sapply(v1, function(x){
y <- x^2 ## y will be returned
}) #
print(v2)
[1] 1 4 9 16 25 36 49 64 81 100
lapply
- Similar to
sapply
, except final object is returned as list
.
- Useful if you need to store more complex objects (data.frame, plot, raster etc.)
v1 <- 1:10
v2 <- lapply(v1, function(x){
y <- x^2 ## y will be returned
}) #
print(v2)
[[1]]
[1] 1
[[2]]
[1] 4
[[3]]
[1] 9
[[4]]
[1] 16
[[5]]
[1] 25
[[6]]
[1] 36
[[7]]
[1] 49
[[8]]
[1] 64
[[9]]
[1] 81
[[10]]
[1] 100
apply
apply
works well for 2D data, when you want to apply function over a row or column.
v1 <- sample(1:100, 10)
v2 <- sample(1:100, 10)
DF <- data.frame(v1, v2) ## data frame columns will take names of vectors
DF
v1 v2
1 9 38
2 43 88
3 19 30
4 22 70
5 46 94
6 40 19
7 29 79
8 57 86
9 16 14
10 66 23
Use apply
to get column max value. The index 2 means “apply function to columns”.
colMax <- apply(DF, 2, FUN = max)
colMax
Use apply
to get row max value. The index 1 means “apply function to rows”.
rowMax <- apply(DF, 1, FUN = max)
rowMax
[1] 38 88 30 70 94 40 79 86 16 66
We can use apply
or sapply
to create a new column in a data frame.
DF$rowMax <- apply(DF, 1, FUN = max)
DF
v1 v2 rowMax
1 9 38 38
2 43 88 88
3 19 30 30
4 22 70 70
5 46 94 94
6 40 19 40
7 29 79 79
8 57 86 86
9 16 14 16
10 66 23 66