Creating and manipulating attributes for spatial data

2023-09-20

Error in `loadNamespace()`:
! there is no package called 'here'

1 Setup

Today, we are going to use the tidyverse along with sf and two related packages: lwgeom and units.

library(tidyverse)
library(sf)
library(lwgeom)
library(units)

2 Getting started

For this session, we also need some data to look at. We are going to load data from the U.S. Census Bureau using the tigris package:

library(tigris)
options(tigris_use_cache = TRUE)

Use states() to load data on U.S. states and primary_roads() to load data on U.S. highways:

us_states <- states()

us_highways <- primary_roads()

3 Things to remember about spatial data

  • Fields
  • Objects

Check out the Wikipedia article on data models in GIS for more background on this topic.

4 Things to remember about sf and sfc objects

A sf object is a data frame with a sfc list-column.

us_states$geometry
Geometry set for 56 features 
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
Geodetic CRS:  NAD83
First 5 geometries:

5 Things to remember about sf and sfc objects

  • a sf object is a data frame with a sfc list-column
  • a sf object has a sf_column attribute (it isn’t always named geometry—use attributes() to take a look)
attributes(us_states)
$names
 [1] "REGION"   "DIVISION" "STATEFP"  "STATENS"  "GEOID"    "GEOIDFQ" 
 [7] "STUSPS"   "NAME"     "LSAD"     "MTFCC"    "FUNCSTAT" "ALAND"   
[13] "AWATER"   "INTPTLAT" "INTPTLON" "geometry"

$row.names
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
[26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
[51] 51 52 53 54 55 56

$class
[1] "sf"         "data.frame"

$sf_column
[1] "geometry"

$agr
  REGION DIVISION  STATEFP  STATENS    GEOID  GEOIDFQ   STUSPS     NAME 
    <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA> 
    LSAD    MTFCC FUNCSTAT    ALAND   AWATER INTPTLAT INTPTLON 
    <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA> 
Levels: constant aggregate identity

$tigris
[1] "state"

6 Things to remember about sf and sfc objects

  • a sf object is a data frame with a sfc list-column
  • a sf object has a sf_column attribute (it isn’t always named geometry—use attributes() to take a look)
  • sf and sfc objects use a coordinate reference system
st_crs(us_states)
Coordinate Reference System:
  User input: NAD83 
  wkt:
GEOGCRS["NAD83",
    DATUM["North American Datum 1983",
        ELLIPSOID["GRS 1980",6378137,298.257222101,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["latitude",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["longitude",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4269]]
st_crs(us_states$geometry)
Coordinate Reference System:
  User input: NAD83 
  wkt:
GEOGCRS["NAD83",
    DATUM["North American Datum 1983",
        ELLIPSOID["GRS 1980",6378137,298.257222101,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["latitude",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["longitude",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4269]]

7 Things to remember about sf and sfc objects

  • a sf object is a data frame with a sfc list-column
  • a sf object has a sf_column attribute (it isn’t always named geometry—use attributes() to take a look)
  • sf and sfc objects use a coordinate reference system
  • working with sf objects is slower than working with data frames—so drop the geometry if you don’t need it
st_drop_geometry(us_states)
   REGION DIVISION STATEFP  STATENS GEOID     GEOIDFQ STUSPS
1       3        5      54 01779805    54 0400000US54     WV
2       3        5      12 00294478    12 0400000US12     FL
3       2        3      17 01779784    17 0400000US17     IL
4       2        4      27 00662849    27 0400000US27     MN
5       3        5      24 01714934    24 0400000US24     MD
6       1        1      44 01219835    44 0400000US44     RI
7       4        8      16 01779783    16 0400000US16     ID
8       1        1      33 01779794    33 0400000US33     NH
9       3        5      37 01027616    37 0400000US37     NC
10      1        1      50 01779802    50 0400000US50     VT
11      1        1      09 01779780    09 0400000US09     CT
12      3        5      10 01779781    10 0400000US10     DE
13      4        8      35 00897535    35 0400000US35     NM
14      4        9      06 01779778    06 0400000US06     CA
15      1        2      34 01779795    34 0400000US34     NJ
16      2        3      55 01779806    55 0400000US55     WI
17      4        9      41 01155107    41 0400000US41     OR
18      2        4      31 01779792    31 0400000US31     NE
19      1        2      42 01779798    42 0400000US42     PA
20      4        9      53 01779804    53 0400000US53     WA
21      3        7      22 01629543    22 0400000US22     LA
22      3        5      13 01705317    13 0400000US13     GA
23      3        6      01 01779775    01 0400000US01     AL
24      4        8      49 01455989    49 0400000US49     UT
25      2        3      39 01085497    39 0400000US39     OH
26      3        7      48 01779801    48 0400000US48     TX
27      4        8      08 01779779    08 0400000US08     CO
28      3        5      45 01779799    45 0400000US45     SC
29      3        7      40 01102857    40 0400000US40     OK
30      3        6      47 01325873    47 0400000US47     TN
31      4        8      56 01779807    56 0400000US56     WY
32      4        9      15 01779782    15 0400000US15     HI
33      2        4      38 01779797    38 0400000US38     ND
34      3        6      21 01779786    21 0400000US21     KY
35      9        0      78 01802710    78 0400000US78     VI
36      9        0      69 01779809    69 0400000US69     MP
37      9        0      66 01802705    66 0400000US66     GU
38      1        1      23 01779787    23 0400000US23     ME
39      1        2      36 01779796    36 0400000US36     NY
40      4        8      32 01779793    32 0400000US32     NV
41      4        9      02 01785533    02 0400000US02     AK
42      9        0      60 01802701    60 0400000US60     AS
43      2        3      26 01779789    26 0400000US26     MI
44      3        7      05 00068085    05 0400000US05     AR
45      3        6      28 01779790    28 0400000US28     MS
46      2        4      29 01779791    29 0400000US29     MO
47      4        8      30 00767982    30 0400000US30     MT
48      2        4      20 00481813    20 0400000US20     KS
49      2        3      18 00448508    18 0400000US18     IN
50      9        0      72 01779808    72 0400000US72     PR
51      2        4      46 01785534    46 0400000US46     SD
52      1        1      25 00606926    25 0400000US25     MA
53      3        5      51 01779803    51 0400000US51     VA
54      3        5      11 01702382    11 0400000US11     DC
55      2        4      19 01779785    19 0400000US19     IA
56      4        8      04 01779777    04 0400000US04     AZ
                                           NAME LSAD MTFCC FUNCSTAT
1                                 West Virginia   00 G4000        A
2                                       Florida   00 G4000        A
3                                      Illinois   00 G4000        A
4                                     Minnesota   00 G4000        A
5                                      Maryland   00 G4000        A
6                                  Rhode Island   00 G4000        A
7                                         Idaho   00 G4000        A
8                                 New Hampshire   00 G4000        A
9                                North Carolina   00 G4000        A
10                                      Vermont   00 G4000        A
11                                  Connecticut   00 G4000        A
12                                     Delaware   00 G4000        A
13                                   New Mexico   00 G4000        A
14                                   California   00 G4000        A
15                                   New Jersey   00 G4000        A
16                                    Wisconsin   00 G4000        A
17                                       Oregon   00 G4000        A
18                                     Nebraska   00 G4000        A
19                                 Pennsylvania   00 G4000        A
20                                   Washington   00 G4000        A
21                                    Louisiana   00 G4000        A
22                                      Georgia   00 G4000        A
23                                      Alabama   00 G4000        A
24                                         Utah   00 G4000        A
25                                         Ohio   00 G4000        A
26                                        Texas   00 G4000        A
27                                     Colorado   00 G4000        A
28                               South Carolina   00 G4000        A
29                                     Oklahoma   00 G4000        A
30                                    Tennessee   00 G4000        A
31                                      Wyoming   00 G4000        A
32                                       Hawaii   00 G4000        A
33                                 North Dakota   00 G4000        A
34                                     Kentucky   00 G4000        A
35                 United States Virgin Islands   00 G4000        A
36 Commonwealth of the Northern Mariana Islands   00 G4000        A
37                                         Guam   00 G4000        A
38                                        Maine   00 G4000        A
39                                     New York   00 G4000        A
40                                       Nevada   00 G4000        A
41                                       Alaska   00 G4000        A
42                               American Samoa   00 G4000        A
43                                     Michigan   00 G4000        A
44                                     Arkansas   00 G4000        A
45                                  Mississippi   00 G4000        A
46                                     Missouri   00 G4000        A
47                                      Montana   00 G4000        A
48                                       Kansas   00 G4000        A
49                                      Indiana   00 G4000        A
50                                  Puerto Rico   00 G4000        A
51                                 South Dakota   00 G4000        A
52                                Massachusetts   00 G4000        A
53                                     Virginia   00 G4000        A
54                         District of Columbia   00 G4000        A
55                                         Iowa   00 G4000        A
56                                      Arizona   00 G4000        A
          ALAND       AWATER    INTPTLAT     INTPTLON
1  6.226651e+10    488918898 +38.6472854 -080.6183274
2  1.389654e+11  45968913048 +28.3989775 -082.5143005
3  1.437782e+11   6216848695 +40.1028754 -089.1526108
4  2.062448e+11  18937236061 +46.3159573 -094.1996043
5  2.515122e+10   6979843236 +38.9466584 -076.6744939
6  2.677769e+09   1323681453 +41.5964850 -071.5264901
7  2.140505e+11   2390996667 +44.3484222 -114.5588538
8  2.319021e+10   1025871482 +43.6727945 -071.5841886
9  1.259360e+11  13453455061 +35.5397100 -079.1308636
10 2.387259e+10   1030642813 +44.0589536 -072.6710173
11 1.254200e+10   1816115183 +41.5798637 -072.7466572
12 5.046692e+09   1399219008 +38.9985661 -075.4416440
13 3.141985e+11    726531289 +34.4346843 -106.1316181
14 4.036734e+11  20291632828 +37.1551773 -119.5434183
15 1.904923e+10   3533374440 +40.1072744 -074.6652012
16 1.402946e+11  29341156602 +44.6309071 -089.7093916
17 2.486304e+11   6168761370 +43.9717125 -120.6229578
18 1.989500e+11   1378956981 +41.5433053 -099.8118646
19 1.158815e+11   3397613881 +40.9046042 -077.8275233
20 1.721188e+11  12549101417 +47.4073238 -120.5757999
21 1.119209e+11  23730773357 +30.7083190 -091.6046207
22 1.494858e+11   4419221858 +32.6295789 -083.4235109
23 1.311856e+11   4581813708 +32.7395785 -086.8434469
24 2.139219e+11   5963163952 +39.3349925 -111.6563326
25 1.058241e+11  10274225585 +40.4149297 -082.7119975
26 6.766567e+11  19011620342 +31.4347032 -099.2818238
27 2.684190e+11   1185541418 +38.9937669 -105.5087122
28 7.786566e+10   5074780894 +33.8741776 -080.8542639
29 1.776646e+11   3373250292 +35.5900815 -097.4867789
30 1.067705e+11   2345359962 +35.8584600 -086.3496339
31 2.514582e+11   1868025485 +42.9896591 -107.5443922
32 1.663442e+10  11777375352 +19.8281671 -155.4950421
33 1.786943e+11   4414770689 +47.4421740 -100.4608258
34 1.022668e+11   2384136185 +37.5336844 -085.2929801
35 3.480219e+08   1550236187 +18.3392359 -064.9500433
36 4.722925e+08   4644252458 +15.0010865 +145.6181702
37 5.435558e+08    934337453 +13.4382885 +144.7729494
38 7.988840e+10  11745065493 +45.4092843 -068.6666160
39 1.220492e+11  19256755462 +42.9133974 -075.5962723
40 2.845370e+11   1839880833 +39.3310928 -116.6151469
41 1.479509e+12 244710526650 +63.3473560 -152.8397334
42 1.977591e+08   1307243751 -14.2671590 -170.6682674
43 1.466217e+11 103864424446 +44.8441768 -085.6604907
44 1.346585e+11   3122715710 +34.8955256 -092.4446262
45 1.215341e+11   3914302814 +32.6864714 -089.6561377
46 1.780524e+11   2487375487 +38.3507500 -092.4567826
47 3.769734e+11   3866958878 +47.0511770 -109.6348174
48 2.117538e+11   1345663763 +38.4985464 -098.3834298
49 9.278669e+10   1543916881 +39.9013136 -086.2919129
50 8.869520e+09   4921758891 +18.2176480 -066.4107992
51 1.963417e+11   3387563375 +44.4467957 -100.2381762
52 2.020440e+10   7130649971 +42.1565196 -071.4895915
53 1.022563e+11   8529908321 +37.5222512 -078.6681938
54 1.583162e+08     18709785 +38.9042474 -077.0165167
55 1.446603e+11   1085343719 +42.0700243 -093.4933473
56 2.943661e+11    853991999 +34.2039362 -111.6063449

8 Things to remember about coordinate reference systems

  • objects must share the same coordinate reference system if you are using them together
  • coordinate reference systems are stored as attributes for sfc and sf objects (sfg objects don’t have a CRS)
  • coordinate reference systems have units
  • geographic and projected coordinate reference systems are not the same
  • coordinate reference systems can be missing and they can be wrong
# bad: assigning a CRS that doesn't match the geometry
st_crs(us_states) <- 3857

# good: using st_transform to convert the geometry to a new CRS
st_transform(us_states, crs = 3857)

Get the objects into the same CRS before we continue:

us_states <- st_transform(us_states, crs = 3857)

us_highways <- st_transform(us_highways, crs = 3857)

9 Things to think about

  • sf objects are not the only way to represent spatial data in R

  • If you are working with more than one sf or sfc, the objects must use the same coordinate reference system to use them together.

# state

10 What is an “attribute” in GIS?

Within a GIS desktop application, an attribute may be known as a field.

When we talk about tidy data frames, an attribute is equivalent to a variable which is represented as a column in a data frame.

But! Feature geometry is also stored as a column.

A single sfc object (or list-column) can contain more than one feature (shapefiles can’t do this BTW!) using MULTIPOINT, MULTILINESTRING, MULTIPOLYGON, or GEOMETRYCOLLECTION geometry types.

And! Objects in R can also have attributes and these are not the same thing as attributes for spatial data. Try not to get them mixed up.

11 What is an attribute?

So (to recap) an attribute in GIS can also be called a…

  • field in a desktop GIS application
  • variable in tidy data
  • column in a data frame

12 What types of attributes exist?

Attributes are “properties of features (‘things’) that do not describe the feature’s geometry”.

Here are the attributes for us_states:

glimpse(st_drop_geometry(us_states))
Rows: 56
Columns: 15
$ REGION   <chr> "3", "3", "2", "2", "3", "1", "4", "1", "3", "1", "1", "3", "…
$ DIVISION <chr> "5", "5", "3", "4", "5", "1", "8", "1", "5", "1", "1", "5", "…
$ STATEFP  <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ STATENS  <chr> "01779805", "00294478", "01779784", "00662849", "01714934", "…
$ GEOID    <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ GEOIDFQ  <chr> "0400000US54", "0400000US12", "0400000US17", "0400000US27", "…
$ STUSPS   <chr> "WV", "FL", "IL", "MN", "MD", "RI", "ID", "NH", "NC", "VT", "…
$ NAME     <chr> "West Virginia", "Florida", "Illinois", "Minnesota", "Marylan…
$ LSAD     <chr> "00", "00", "00", "00", "00", "00", "00", "00", "00", "00", "…
$ MTFCC    <chr> "G4000", "G4000", "G4000", "G4000", "G4000", "G4000", "G4000"…
$ FUNCSTAT <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
$ ALAND    <dbl> 62266513826, 138965379385, 143778206717, 206244791203, 251512…
$ AWATER   <dbl> 488918898, 45968913048, 6216848695, 18937236061, 6979843236, …
$ INTPTLAT <chr> "+38.6472854", "+28.3989775", "+40.1028754", "+46.3159573", "…
$ INTPTLON <chr> "-080.6183274", "-082.5143005", "-089.1526108", "-094.1996043…

Attributes can have:

  • point support: the value applies to every point individually, or
  • block support: the value is a summary for all points in the geometry

The relationship between attributes and geometry can be described as:

  • constant: the value is valid everywhere in or over the geometry
  • aggregate: the value is associated with the entire geometry

What is an example of an attribute with a constant relationship?

glimpse(us_states)
Rows: 56
Columns: 16
$ REGION   <chr> "3", "3", "2", "2", "3", "1", "4", "1", "3", "1", "1", "3", "…
$ DIVISION <chr> "5", "5", "3", "4", "5", "1", "8", "1", "5", "1", "1", "5", "…
$ STATEFP  <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ STATENS  <chr> "01779805", "00294478", "01779784", "00662849", "01714934", "…
$ GEOID    <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ GEOIDFQ  <chr> "0400000US54", "0400000US12", "0400000US17", "0400000US27", "…
$ STUSPS   <chr> "WV", "FL", "IL", "MN", "MD", "RI", "ID", "NH", "NC", "VT", "…
$ NAME     <chr> "West Virginia", "Florida", "Illinois", "Minnesota", "Marylan…
$ LSAD     <chr> "00", "00", "00", "00", "00", "00", "00", "00", "00", "00", "…
$ MTFCC    <chr> "G4000", "G4000", "G4000", "G4000", "G4000", "G4000", "G4000"…
$ FUNCSTAT <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
$ ALAND    <dbl> 62266513826, 138965379385, 143778206717, 206244791203, 251512…
$ AWATER   <dbl> 488918898, 45968913048, 6216848695, 18937236061, 6979843236, …
$ INTPTLAT <chr> "+38.6472854", "+28.3989775", "+40.1028754", "+46.3159573", "…
$ INTPTLON <chr> "-080.6183274", "-082.5143005", "-089.1526108", "-094.1996043…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((-8655578 47..., MULTIPOLYGON (((…

What is an example with an aggregate relationship?

glimpse(us_states)
Rows: 56
Columns: 16
$ REGION   <chr> "3", "3", "2", "2", "3", "1", "4", "1", "3", "1", "1", "3", "…
$ DIVISION <chr> "5", "5", "3", "4", "5", "1", "8", "1", "5", "1", "1", "5", "…
$ STATEFP  <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ STATENS  <chr> "01779805", "00294478", "01779784", "00662849", "01714934", "…
$ GEOID    <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ GEOIDFQ  <chr> "0400000US54", "0400000US12", "0400000US17", "0400000US27", "…
$ STUSPS   <chr> "WV", "FL", "IL", "MN", "MD", "RI", "ID", "NH", "NC", "VT", "…
$ NAME     <chr> "West Virginia", "Florida", "Illinois", "Minnesota", "Marylan…
$ LSAD     <chr> "00", "00", "00", "00", "00", "00", "00", "00", "00", "00", "…
$ MTFCC    <chr> "G4000", "G4000", "G4000", "G4000", "G4000", "G4000", "G4000"…
$ FUNCSTAT <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
$ ALAND    <dbl> 62266513826, 138965379385, 143778206717, 206244791203, 251512…
$ AWATER   <dbl> 488918898, 45968913048, 6216848695, 18937236061, 6979843236, …
$ INTPTLAT <chr> "+38.6472854", "+28.3989775", "+40.1028754", "+46.3159573", "…
$ INTPTLON <chr> "-080.6183274", "-082.5143005", "-089.1526108", "-094.1996043…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((-8655578 47..., MULTIPOLYGON (((…

Attributes can be:

  • extensive: corresponding to amounts, associated with a physical size (length, area, volume, counts of items)
  • intensive: FIXME: fill in missing definition.

13 What is an “attribute domain”?

14 What are we working with?

When we do data analysis using dplyr, there are three types of functions we use most often:

  • Boolean operators or predicates
  • Window or vector functions
  • Summary or analysis functions

There are similar groups of functions in sf:

15 Predicate functions for geometries with {sf}

sf includes “vectorized” logical operators or tests that work with a geometry list (sf or sfc) or a single geometry (sfg) including:

  • st_is
  • st_is_valid
  • st_is_empty
st_is(us_states, "POLYGON")
 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
st_is(us_states, "MULTIPOLYGON")
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

16 Predicate functions for geometries with {sf}

sf also includes more than a dozen predicate functions for working with pairs of simple geometries including:

  • st_intersects
  • st_disjoint
  • st_contains
  • st_covers
  • st_is_within_distance

17 Using predicate functions for spatial joins and filters

st_filter() and st_join() are two functions that rely on these predicate functions to work.

maryland <- filter(us_states, NAME == "Maryland")

st_filter(us_highways, maryland)
Simple feature collection with 466 features and 4 fields
Geometry type: LINESTRING
Dimension:     XY
Bounding box:  xmin: -8881626 ymin: 4621400 xmax: -8367444 ymax: 4897471
Projected CRS: WGS 84 / Pseudo-Mercator
First 10 features:
        LINEARID      FULLNAME RTTYP MTFCC                       geometry
1  1104258795090 Salisbury Byp     M S1100 LINESTRING (-8416214 463670...
2  1103075320064 Salisbury Byp     M S1100 LINESTRING (-8414039 463644...
3  1104258795072 Salisbury Byp     M S1100 LINESTRING (-8417144 463654...
4  1104258845568 Salisbury Byp     M S1100 LINESTRING (-8413964 463644...
5   110191599447 Salisbury Byp     M S1100 LINESTRING (-8409248 463421...
6   110191599448 Salisbury Byp     M S1100 LINESTRING (-8419516 462217...
7  1102149806067 Salisbury Byp     M S1100 LINESTRING (-8412863 463617...
8  1102149813008 Salisbury Byp     M S1100 LINESTRING (-8412725 463596...
9  1102149806121 Salisbury Byp     M S1100 LINESTRING (-8412973 463624...
10 1102166635559 Salisbury Byp     M S1100 LINESTRING (-8412583 463576...
st_filter(us_states, maryland, .predicate = st_disjoint)
Simple feature collection with 50 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
First 10 features:
   REGION DIVISION STATEFP  STATENS GEOID     GEOIDFQ STUSPS           NAME
1       3        5      12 00294478    12 0400000US12     FL        Florida
2       2        3      17 01779784    17 0400000US17     IL       Illinois
3       2        4      27 00662849    27 0400000US27     MN      Minnesota
4       1        1      44 01219835    44 0400000US44     RI   Rhode Island
5       4        8      16 01779783    16 0400000US16     ID          Idaho
6       1        1      33 01779794    33 0400000US33     NH  New Hampshire
7       3        5      37 01027616    37 0400000US37     NC North Carolina
8       1        1      50 01779802    50 0400000US50     VT        Vermont
9       1        1      09 01779780    09 0400000US09     CT    Connecticut
10      4        8      35 00897535    35 0400000US35     NM     New Mexico
   LSAD MTFCC FUNCSTAT        ALAND      AWATER    INTPTLAT     INTPTLON
1    00 G4000        A 138965379385 45968913048 +28.3989775 -082.5143005
2    00 G4000        A 143778206717  6216848695 +40.1028754 -089.1526108
3    00 G4000        A 206244791203 18937236061 +46.3159573 -094.1996043
4    00 G4000        A   2677768885  1323681453 +41.5964850 -071.5264901
5    00 G4000        A 214050504522  2390996667 +44.3484222 -114.5588538
6    00 G4000        A  23190211616  1025871482 +43.6727945 -071.5841886
7    00 G4000        A 125935965771 13453455061 +35.5397100 -079.1308636
8    00 G4000        A  23872594714  1030642813 +44.0589536 -072.6710173
9    00 G4000        A  12541999507  1816115183 +41.5798637 -072.7466572
10   00 G4000        A 314198519809   726531289 +34.4346843 -106.1316181
                         geometry
1  MULTIPOLYGON (((-9251622 28...
2  MULTIPOLYGON (((-9784141 46...
3  MULTIPOLYGON (((-10610964 6...
4  MULTIPOLYGON (((-7979248 50...
5  MULTIPOLYGON (((-12952962 6...
6  MULTIPOLYGON (((-7885747 53...
7  MULTIPOLYGON (((-8671763 43...
8  MULTIPOLYGON (((-8019664 54...
9  MULTIPOLYGON (((-8073769 50...
10 MULTIPOLYGON (((-11473113 3...

18 Creating new variables with geometry

  • Measuring feature geometries
  • Comparing feature geometries
  • Joining data based on feature geometries
maryland <- filter(us_states, NAME == "Maryland")

19 Measuring geometries with {sf}

sf includes a few different functions for measuring geometries:

  • st_area() (only works with POLYGON and MULTIPOLYGON geometries)
  • st_length() (only wrks with LINESTRING and MULTILINSTRING geometries)
  • st_distance() (requires a pair of objects)

20 Measuring geometries with {sf}

All of these functions are vectorized meaning that they can operate independently on each feature in a sf or sfc object.

They support both sf inputs (data frames) and sfc inputs (lists)—but they always return a vector:

st_area(maryland)
53204014051 [m^2]
st_area(maryland$geometry)
53204014051 [m^2]

21 Measuring geometries with {sf}

But, remember, dplyr::mutate() is designed to work with vectorized functions so you can use a measurement function inside mutate():

maryland |>
  mutate(
    area = st_area(geometry)
  )
Simple feature collection with 1 feature and 16 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -8848525 ymin: 4563419 xmax: -8347435 ymax: 4825776
Projected CRS: WGS 84 / Pseudo-Mercator
  REGION DIVISION STATEFP  STATENS GEOID     GEOIDFQ STUSPS     NAME LSAD MTFCC
1      3        5      24 01714934    24 0400000US24     MD Maryland   00 G4000
  FUNCSTAT       ALAND     AWATER    INTPTLAT     INTPTLON
1        A 25151223822 6979843236 +38.9466584 -076.6744939
                        geometry              area
1 MULTIPOLYGON (((-8433120 47... 53204014051 [m^2]

22 Measuring geometries with {sf}

You can even work with multiple geometries using this same approach:

us_states |>
  mutate(
    area = st_area(geometry),
    distance_to_maryland = st_distance(geometry, maryland)
  )
Simple feature collection with 56 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
First 10 features:
   REGION DIVISION STATEFP  STATENS GEOID     GEOIDFQ STUSPS           NAME
1       3        5      54 01779805    54 0400000US54     WV  West Virginia
2       3        5      12 00294478    12 0400000US12     FL        Florida
3       2        3      17 01779784    17 0400000US17     IL       Illinois
4       2        4      27 00662849    27 0400000US27     MN      Minnesota
5       3        5      24 01714934    24 0400000US24     MD       Maryland
6       1        1      44 01219835    44 0400000US44     RI   Rhode Island
7       4        8      16 01779783    16 0400000US16     ID          Idaho
8       1        1      33 01779794    33 0400000US33     NH  New Hampshire
9       3        5      37 01027616    37 0400000US37     NC North Carolina
10      1        1      50 01779802    50 0400000US50     VT        Vermont
   LSAD MTFCC FUNCSTAT        ALAND      AWATER    INTPTLAT     INTPTLON
1    00 G4000        A  62266513826   488918898 +38.6472854 -080.6183274
2    00 G4000        A 138965379385 45968913048 +28.3989775 -082.5143005
3    00 G4000        A 143778206717  6216848695 +40.1028754 -089.1526108
4    00 G4000        A 206244791203 18937236061 +46.3159573 -094.1996043
5    00 G4000        A  25151223822  6979843236 +38.9466584 -076.6744939
6    00 G4000        A   2677768885  1323681453 +41.5964850 -071.5264901
7    00 G4000        A 214050504522  2390996667 +44.3484222 -114.5588538
8    00 G4000        A  23190211616  1025871482 +43.6727945 -071.5841886
9    00 G4000        A 125935965771 13453455061 +35.5397100 -079.1308636
10   00 G4000        A  23872594714  1030642813 +44.0589536 -072.6710173
                         geometry               area distance_to_maryland
1  MULTIPOLYGON (((-8655578 47... 103045820058 [m^2]              0.0 [m]
2  MULTIPOLYGON (((-9251622 28... 240290408316 [m^2]        1122737.0 [m]
3  MULTIPOLYGON (((-9784141 46... 257089838407 [m^2]         893538.7 [m]
4  MULTIPOLYGON (((-10610964 6... 473223871919 [m^2]        1423100.6 [m]
5  MULTIPOLYGON (((-8433120 47...  53204014051 [m^2]              0.0 [m]
6  MULTIPOLYGON (((-7979248 50...   7160497121 [m^2]         490274.7 [m]
7  MULTIPOLYGON (((-12952962 6... 424675068385 [m^2]        3530363.1 [m]
8  MULTIPOLYGON (((-7885747 53...  46326397472 [m^2]         579065.3 [m]
9  MULTIPOLYGON (((-8671763 43... 210941928127 [m^2]         186785.1 [m]
10 MULTIPOLYGON (((-8019664 54...  48271748241 [m^2]         527897.6 [m]

This works to aggregate features by division:

us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME)
  )
Simple feature collection with 10 features and 2 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
# A tibble: 10 × 3
   DIVISION n_states                                                    geometry
   <chr>       <int>                                              <GEOMETRY [m]>
 1 0               5 MULTIPOLYGON (((-18726019 -1633065, -18725998 -1633025, -1…
 2 1               6 MULTIPOLYGON (((-7978692 5038758, -7978581 5039616, -79783…
 3 2               3 POLYGON ((-8290777 4763109, -8291389 4762454, -8291849 476…
 4 3               5 POLYGON ((-9799335 4551032, -9799438 4550965, -9799539 455…
 5 4               7 POLYGON ((-10176807 4921011, -10176847 4920961, -10177047 …
 6 5               9 MULTIPOLYGON (((-9250560 2835330, -9249947 2836819, -92493…
 7 6               4 POLYGON ((-9840190 3523448, -9841106 3523126, -9842801 352…
 8 7               4 POLYGON ((-10148561 3895849, -10148557 3895720, -10148574 …
 9 8               8 POLYGON ((-11473113 3798919, -11473113 3798905, -11473113 …
10 9               5 MULTIPOLYGON (((-19000283 2939831, -19000200 2940106, -190…
us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME)
  ) |>
  ggplot() +
  geom_sf(aes(fill = DIVISION)) +
  theme_minimal()

This doesn’t entirely work (yet):

us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME) # ,
    # .by = DIVISION
  )
Simple feature collection with 10 features and 2 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
# A tibble: 10 × 3
   DIVISION n_states                                                    geometry
   <chr>       <int>                                              <GEOMETRY [m]>
 1 0               5 MULTIPOLYGON (((-18726019 -1633065, -18725998 -1633025, -1…
 2 1               6 MULTIPOLYGON (((-7978692 5038758, -7978581 5039616, -79783…
 3 2               3 POLYGON ((-8290777 4763109, -8291389 4762454, -8291849 476…
 4 3               5 POLYGON ((-9799335 4551032, -9799438 4550965, -9799539 455…
 5 4               7 POLYGON ((-10176807 4921011, -10176847 4920961, -10177047 …
 6 5               9 MULTIPOLYGON (((-9250560 2835330, -9249947 2836819, -92493…
 7 6               4 POLYGON ((-9840190 3523448, -9841106 3523126, -9842801 352…
 8 7               4 POLYGON ((-10148561 3895849, -10148557 3895720, -10148574 …
 9 8               8 POLYGON ((-11473113 3798919, -11473113 3798905, -11473113 …
10 9               5 MULTIPOLYGON (((-19000283 2939831, -19000200 2940106, -190…

But this (explicitly unioning the geometry) does work:

us_states |>
  summarise(
    n_states = n_distinct(NAME),
    geometry = st_union(geometry),
    .by = DIVISION
  )
Simple feature collection with 10 features and 2 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
   DIVISION n_states                       geometry
1         5        9 MULTIPOLYGON (((-9250560 28...
2         3        5 POLYGON ((-9799335 4551032,...
3         4        7 POLYGON ((-10176807 4921011...
4         1        6 MULTIPOLYGON (((-7978692 50...
5         8        8 POLYGON ((-11473113 3798919...
6         9        5 MULTIPOLYGON (((-19000283 2...
7         2        3 POLYGON ((-8290777 4763109,...
8         7        4 POLYGON ((-10148561 3895849...
9         6        4 POLYGON ((-9840190 3523448,...
10        0        5 MULTIPOLYGON (((-18726019 -...
us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME),
    geometry = st_union(geometry)
  ) |>
  st_as_sf() |>
  ggplot() +
  geom_sf(aes(fill = DIVISION)) +
  theme_minimal()

23 Measuring geometries with {sf}

Not all functions work with all geometry types!

  • st_area() only works with POLYGON and MULTIPOLYGON geometries
  • st_length() only works with LINESTRING and MULTILINSTRING geometries

24 Measuring geometries with {sf}

  • st_distance() requires at least two objects

25 Measuring geometries with {lwgeom} and {geosphere}

  • lwgeom::st_perimeter()
  • geosphere::bearing()
us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME)
  )
Simple feature collection with 10 features and 2 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
# A tibble: 10 × 3
   DIVISION n_states                                                    geometry
   <chr>       <int>                                              <GEOMETRY [m]>
 1 0               5 MULTIPOLYGON (((-18726019 -1633065, -18725998 -1633025, -1…
 2 1               6 MULTIPOLYGON (((-7978692 5038758, -7978581 5039616, -79783…
 3 2               3 POLYGON ((-8290777 4763109, -8291389 4762454, -8291849 476…
 4 3               5 POLYGON ((-9799335 4551032, -9799438 4550965, -9799539 455…
 5 4               7 POLYGON ((-10176807 4921011, -10176847 4920961, -10177047 …
 6 5               9 MULTIPOLYGON (((-9250560 2835330, -9249947 2836819, -92493…
 7 6               4 POLYGON ((-9840190 3523448, -9841106 3523126, -9842801 352…
 8 7               4 POLYGON ((-10148561 3895849, -10148557 3895720, -10148574 …
 9 8               8 POLYGON ((-11473113 3798919, -11473113 3798905, -11473113 …
10 9               5 MULTIPOLYGON (((-19000283 2939831, -19000200 2940106, -190…