Creating and manipulating attributes for spatial data

2023-09-20

1 Setup

Today, we are going to use the {tidyverse} along with {sf} and two related packages: {lwgeom} and {units}.

library(tidyverse)
library(sf)
library(lwgeom)
library(units)

2 Getting started

For this session, we also need some data to look at. We are going to load data from the U.S. Census Bureau using the {tigris} package:

library(tigris)
options(tigris_use_cache = TRUE)

Use states() to load data on U.S. states and primary_roads() to load data on U.S. highways:

us_states <- states()

us_highways <- primary_roads()

3 Things to remember about spatial data

  • Fields
  • Objects

Check out the Wikipedia article on data models in GIS for more background on this topic.

4 Things to remember about sf and sfc objects

A sf object is a data frame with a sfc list-column.

us_states$geometry
Geometry set for 56 features 
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
Geodetic CRS:  NAD83
First 5 geometries:

5 Things to remember about sf and sfc objects

  • a sf object is a data frame with a sfc list-column
  • a sf object has a sf_column attribute (it isn’t always named geometry—use attributes() to take a look)
attributes(us_states)
$names
 [1] "REGION"   "DIVISION" "STATEFP"  "STATENS"  "GEOID"    "STUSPS"  
 [7] "NAME"     "LSAD"     "MTFCC"    "FUNCSTAT" "ALAND"    "AWATER"  
[13] "INTPTLAT" "INTPTLON" "geometry"

$row.names
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
[26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
[51] 51 52 53 54 55 56

$class
[1] "sf"         "data.frame"

$sf_column
[1] "geometry"

$agr
  REGION DIVISION  STATEFP  STATENS    GEOID   STUSPS     NAME     LSAD 
    <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     <NA> 
   MTFCC FUNCSTAT    ALAND   AWATER INTPTLAT INTPTLON 
    <NA>     <NA>     <NA>     <NA>     <NA>     <NA> 
Levels: constant aggregate identity

$tigris
[1] "state"

6 Things to remember about sf and sfc objects

  • a sf object is a data frame with a sfc list-column
  • a sf object has a sf_column attribute (it isn’t always named geometry—use attributes() to take a look)
  • sf and sfc objects use a coordinate reference system
st_crs(us_states)
Coordinate Reference System:
  User input: NAD83 
  wkt:
GEOGCRS["NAD83",
    DATUM["North American Datum 1983",
        ELLIPSOID["GRS 1980",6378137,298.257222101,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["latitude",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["longitude",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4269]]
st_crs(us_states$geometry)
Coordinate Reference System:
  User input: NAD83 
  wkt:
GEOGCRS["NAD83",
    DATUM["North American Datum 1983",
        ELLIPSOID["GRS 1980",6378137,298.257222101,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["latitude",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["longitude",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4269]]

7 Things to remember about sf and sfc objects

  • a sf object is a data frame with a sfc list-column
  • a sf object has a sf_column attribute (it isn’t always named geometry—use attributes() to take a look)
  • sf and sfc objects use a coordinate reference system
  • working with sf objects is slower than working with data frames—so drop the geometry if you don’t need it
st_drop_geometry(us_states)
   REGION DIVISION STATEFP  STATENS GEOID STUSPS
1       3        5      54 01779805    54     WV
2       3        5      12 00294478    12     FL
3       2        3      17 01779784    17     IL
4       2        4      27 00662849    27     MN
5       3        5      24 01714934    24     MD
6       1        1      44 01219835    44     RI
7       4        8      16 01779783    16     ID
8       1        1      33 01779794    33     NH
9       3        5      37 01027616    37     NC
10      1        1      50 01779802    50     VT
11      1        1      09 01779780    09     CT
12      3        5      10 01779781    10     DE
13      4        8      35 00897535    35     NM
14      4        9      06 01779778    06     CA
15      1        2      34 01779795    34     NJ
16      2        3      55 01779806    55     WI
17      4        9      41 01155107    41     OR
18      2        4      31 01779792    31     NE
19      1        2      42 01779798    42     PA
20      4        9      53 01779804    53     WA
21      3        7      22 01629543    22     LA
22      3        5      13 01705317    13     GA
23      3        6      01 01779775    01     AL
24      4        8      49 01455989    49     UT
25      2        3      39 01085497    39     OH
26      3        7      48 01779801    48     TX
27      4        8      08 01779779    08     CO
28      3        5      45 01779799    45     SC
29      3        7      40 01102857    40     OK
30      3        6      47 01325873    47     TN
31      4        8      56 01779807    56     WY
32      4        9      15 01779782    15     HI
33      2        4      38 01779797    38     ND
34      3        6      21 01779786    21     KY
35      9        0      78 01802710    78     VI
36      9        0      69 01779809    69     MP
37      9        0      66 01802705    66     GU
38      1        1      23 01779787    23     ME
39      1        2      36 01779796    36     NY
40      4        8      32 01779793    32     NV
41      4        9      02 01785533    02     AK
42      9        0      60 01802701    60     AS
43      2        3      26 01779789    26     MI
44      3        7      05 00068085    05     AR
45      3        6      28 01779790    28     MS
46      2        4      29 01779791    29     MO
47      4        8      30 00767982    30     MT
48      2        4      20 00481813    20     KS
49      2        3      18 00448508    18     IN
50      9        0      72 01779808    72     PR
51      2        4      46 01785534    46     SD
52      1        1      25 00606926    25     MA
53      3        5      51 01779803    51     VA
54      3        5      11 01702382    11     DC
55      2        4      19 01779785    19     IA
56      4        8      04 01779777    04     AZ
                                           NAME LSAD MTFCC FUNCSTAT
1                                 West Virginia   00 G4000        A
2                                       Florida   00 G4000        A
3                                      Illinois   00 G4000        A
4                                     Minnesota   00 G4000        A
5                                      Maryland   00 G4000        A
6                                  Rhode Island   00 G4000        A
7                                         Idaho   00 G4000        A
8                                 New Hampshire   00 G4000        A
9                                North Carolina   00 G4000        A
10                                      Vermont   00 G4000        A
11                                  Connecticut   00 G4000        A
12                                     Delaware   00 G4000        A
13                                   New Mexico   00 G4000        A
14                                   California   00 G4000        A
15                                   New Jersey   00 G4000        A
16                                    Wisconsin   00 G4000        A
17                                       Oregon   00 G4000        A
18                                     Nebraska   00 G4000        A
19                                 Pennsylvania   00 G4000        A
20                                   Washington   00 G4000        A
21                                    Louisiana   00 G4000        A
22                                      Georgia   00 G4000        A
23                                      Alabama   00 G4000        A
24                                         Utah   00 G4000        A
25                                         Ohio   00 G4000        A
26                                        Texas   00 G4000        A
27                                     Colorado   00 G4000        A
28                               South Carolina   00 G4000        A
29                                     Oklahoma   00 G4000        A
30                                    Tennessee   00 G4000        A
31                                      Wyoming   00 G4000        A
32                                       Hawaii   00 G4000        A
33                                 North Dakota   00 G4000        A
34                                     Kentucky   00 G4000        A
35                 United States Virgin Islands   00 G4000        A
36 Commonwealth of the Northern Mariana Islands   00 G4000        A
37                                         Guam   00 G4000        A
38                                        Maine   00 G4000        A
39                                     New York   00 G4000        A
40                                       Nevada   00 G4000        A
41                                       Alaska   00 G4000        A
42                               American Samoa   00 G4000        A
43                                     Michigan   00 G4000        A
44                                     Arkansas   00 G4000        A
45                                  Mississippi   00 G4000        A
46                                     Missouri   00 G4000        A
47                                      Montana   00 G4000        A
48                                       Kansas   00 G4000        A
49                                      Indiana   00 G4000        A
50                                  Puerto Rico   00 G4000        A
51                                 South Dakota   00 G4000        A
52                                Massachusetts   00 G4000        A
53                                     Virginia   00 G4000        A
54                         District of Columbia   00 G4000        A
55                                         Iowa   00 G4000        A
56                                      Arizona   00 G4000        A
          ALAND       AWATER    INTPTLAT     INTPTLON
1  6.226646e+10    489045863 +38.6472854 -080.6183274
2  1.389628e+11  45971472526 +28.3989775 -082.5143005
3  1.437785e+11   6216539665 +40.1028754 -089.1526108
4  2.062448e+11  18937184315 +46.3159573 -094.1996043
5  2.515177e+10   6979295311 +38.9466584 -076.6744939
6  2.677763e+09   1323686975 +41.5964850 -071.5264901
7  2.140499e+11   2391592787 +44.3484222 -114.5588538
8  2.319013e+10   1025960758 +43.6726907 -071.5843145
9  1.259356e+11  13453835222 +35.5397100 -079.1308636
10 2.387257e+10   1030754610 +44.0589536 -072.6710173
11 1.254167e+10   1816447681 +41.5798637 -072.7466572
12 5.046704e+09   1399207462 +38.9985661 -075.4416440
13 3.141986e+11    726463825 +34.4346843 -106.1316181
14 4.036736e+11  20291712025 +37.1551773 -119.5434183
15 1.904913e+10   3532872678 +40.1072744 -074.6652012
16 1.402925e+11  29343193162 +44.6309071 -089.7093916
17 2.486303e+11   6169061220 +43.9717125 -120.6229578
18 1.989551e+11   1373812904 +41.5433053 -099.8118646
19 1.158821e+11   3397575101 +40.9046042 -077.8275233
20 1.721189e+11  12548840784 +47.4073238 -120.5757999
21 1.119209e+11  23730743631 +30.8634368 -091.7987173
22 1.494863e+11   4418716153 +32.6295789 -083.4235109
23 1.311850e+11   4582333181 +32.7395785 -086.8434469
24 2.139213e+11   5963769092 +39.3349925 -111.6563326
25 1.058236e+11  10274734976 +40.4149297 -082.7119975
26 6.766856e+11  18974391187 +31.4347032 -099.2818238
27 2.684187e+11   1185778676 +38.9937669 -105.5087122
28 7.786606e+10   5074384036 +33.8741776 -080.8542639
29 1.776647e+11   3373204169 +35.5900815 -097.4867789
30 1.067924e+11   2322190840 +35.8584600 -086.3496339
31 2.514587e+11   1867504105 +42.9896591 -107.5443922
32 1.663426e+10  11777541585 +19.8281671 -155.4950421
33 1.786943e+11   4414779344 +47.4421740 -100.4608258
34 1.022666e+11   2384240769 +37.5336844 -085.2929801
35 3.480219e+08   1550236187 +18.3392359 -064.9500433
36 4.722925e+08   4644252458 +15.0010865 +145.6181702
37 5.435558e+08    934337453 +13.4417451 +144.7719021
38 7.988817e+10  11745192141 +45.4092843 -068.6666160
39 1.220491e+11  19257186417 +42.9133974 -075.5962723
40 2.845374e+11   1839508127 +39.3310928 -116.6151469
41 1.478944e+12 245377731557 +63.3473560 -152.8397334
42 1.977591e+08   1307243751 -14.2671590 -170.6682674
43 1.466191e+11 103867917874 +44.8441768 -085.6604907
44 1.346607e+11   3121974727 +34.8955256 -092.4446262
45 1.215339e+11   3914351266 +32.6864714 -089.6561377
46 1.780523e+11   2487526202 +38.3507500 -092.4567826
47 3.769732e+11   3867186457 +47.0511770 -109.6348174
48 2.117536e+11   1345843968 +38.4985464 -098.3834298
49 9.278861e+10   1542001983 +39.9013136 -086.2919129
50 8.869030e+09   4922249087 +18.2176480 -066.4107992
51 1.963416e+11   3387681983 +44.4467957 -100.2381762
52 2.020434e+10   7130708927 +42.1565196 -071.4895915
53 1.022582e+11   8528072639 +37.5222512 -078.6681938
54 1.583162e+08     18709787 +38.9042474 -077.0165167
55 1.446596e+11   1086089872 +42.0700243 -093.4933473
56 2.943659e+11    853990728 +34.2039362 -111.6063449

8 Things to remember about coordinate reference systems

  • objects must share the same coordinate reference system if you are using them together
  • coordinate reference systems are stored as attributes for sfc and sf objects (sfg objects don’t have a CRS)
  • coordinate reference systems have units
  • geographic and projected coordinate reference systems are not the same
  • coordinate reference systems can be missing and they can be wrong
# bad: assigning a CRS that doesn't match the geometry
st_crs(us_states) <- 3857

# good: using st_transform to convert the geometry to a new CRS
st_transform(us_states, crs = 3857)

Get the objects into the same CRS before we continue:

us_states <- st_transform(us_states, crs = 3857)

us_highways <- st_transform(us_highways, crs = 3857)

9 Things to think about

  • sf objects are not the only way to represent spatial data in R

  • If you are working with more than one sf or sfc, the objects must use the same coordinate reference system to use them together.

# state

10 What is an “attribute” in GIS?

Within a GIS desktop application, an attribute may be known as a field.

When we talk about tidy data frames, an attribute is equivalent to a variable which is represented as a column in a data frame.

But! Feature geometry is also stored as a column.

A single sfc object (or list-column) can contain more than one feature (shapefiles can’t do this BTW!) using MULTIPOINT, MULTILINESTRING, MULTIPOLYGON, or GEOMETRYCOLLECTION geometry types.

And! Objects in R can also have attributes and these are not the same thing as attributes for spatial data. Try not to get them mixed up.

11 What is an attribute?

So (to recap) an attribute in GIS can also be called a…

  • field in a desktop GIS application
  • variable in tidy data
  • column in a data frame

12 What types of attributes exist?

Attributes are “properties of features (‘things’) that do not describe the feature’s geometry”.

Here are the attributes for us_states:

glimpse(st_drop_geometry(us_states))
Rows: 56
Columns: 14
$ REGION   <chr> "3", "3", "2", "2", "3", "1", "4", "1", "3", "1", "1", "3", "…
$ DIVISION <chr> "5", "5", "3", "4", "5", "1", "8", "1", "5", "1", "1", "5", "…
$ STATEFP  <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ STATENS  <chr> "01779805", "00294478", "01779784", "00662849", "01714934", "…
$ GEOID    <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ STUSPS   <chr> "WV", "FL", "IL", "MN", "MD", "RI", "ID", "NH", "NC", "VT", "…
$ NAME     <chr> "West Virginia", "Florida", "Illinois", "Minnesota", "Marylan…
$ LSAD     <chr> "00", "00", "00", "00", "00", "00", "00", "00", "00", "00", "…
$ MTFCC    <chr> "G4000", "G4000", "G4000", "G4000", "G4000", "G4000", "G4000"…
$ FUNCSTAT <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
$ ALAND    <dbl> 62266456923, 138962819934, 143778515726, 206244837557, 251517…
$ AWATER   <dbl> 489045863, 45971472526, 6216539665, 18937184315, 6979295311, …
$ INTPTLAT <chr> "+38.6472854", "+28.3989775", "+40.1028754", "+46.3159573", "…
$ INTPTLON <chr> "-080.6183274", "-082.5143005", "-089.1526108", "-094.1996043…

Attributes can have:

  • point support: the value applies to every point individually, or
  • block support: the value is a summary for all points in the geometry

The relationship between attributes and geometry can be described as:

  • constant: the value is valid everywhere in or over the geometry
  • aggregate: the value is associated with the entire geometry

What is an example of an attribute with a constant relationship?

glimpse(us_states)
Rows: 56
Columns: 15
$ REGION   <chr> "3", "3", "2", "2", "3", "1", "4", "1", "3", "1", "1", "3", "…
$ DIVISION <chr> "5", "5", "3", "4", "5", "1", "8", "1", "5", "1", "1", "5", "…
$ STATEFP  <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ STATENS  <chr> "01779805", "00294478", "01779784", "00662849", "01714934", "…
$ GEOID    <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ STUSPS   <chr> "WV", "FL", "IL", "MN", "MD", "RI", "ID", "NH", "NC", "VT", "…
$ NAME     <chr> "West Virginia", "Florida", "Illinois", "Minnesota", "Marylan…
$ LSAD     <chr> "00", "00", "00", "00", "00", "00", "00", "00", "00", "00", "…
$ MTFCC    <chr> "G4000", "G4000", "G4000", "G4000", "G4000", "G4000", "G4000"…
$ FUNCSTAT <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
$ ALAND    <dbl> 62266456923, 138962819934, 143778515726, 206244837557, 251517…
$ AWATER   <dbl> 489045863, 45971472526, 6216539665, 18937184315, 6979295311, …
$ INTPTLAT <chr> "+38.6472854", "+28.3989775", "+40.1028754", "+46.3159573", "…
$ INTPTLON <chr> "-080.6183274", "-082.5143005", "-089.1526108", "-094.1996043…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((-8655578 47..., MULTIPOLYGON (((…

What is an example with an aggregate relationship?

glimpse(us_states)
Rows: 56
Columns: 15
$ REGION   <chr> "3", "3", "2", "2", "3", "1", "4", "1", "3", "1", "1", "3", "…
$ DIVISION <chr> "5", "5", "3", "4", "5", "1", "8", "1", "5", "1", "1", "5", "…
$ STATEFP  <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ STATENS  <chr> "01779805", "00294478", "01779784", "00662849", "01714934", "…
$ GEOID    <chr> "54", "12", "17", "27", "24", "44", "16", "33", "37", "50", "…
$ STUSPS   <chr> "WV", "FL", "IL", "MN", "MD", "RI", "ID", "NH", "NC", "VT", "…
$ NAME     <chr> "West Virginia", "Florida", "Illinois", "Minnesota", "Marylan…
$ LSAD     <chr> "00", "00", "00", "00", "00", "00", "00", "00", "00", "00", "…
$ MTFCC    <chr> "G4000", "G4000", "G4000", "G4000", "G4000", "G4000", "G4000"…
$ FUNCSTAT <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
$ ALAND    <dbl> 62266456923, 138962819934, 143778515726, 206244837557, 251517…
$ AWATER   <dbl> 489045863, 45971472526, 6216539665, 18937184315, 6979295311, …
$ INTPTLAT <chr> "+38.6472854", "+28.3989775", "+40.1028754", "+46.3159573", "…
$ INTPTLON <chr> "-080.6183274", "-082.5143005", "-089.1526108", "-094.1996043…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((-8655578 47..., MULTIPOLYGON (((…

Attributes can be:

  • extensive: corresponding to amounts, associated with a physical size (length, area, volume, counts of items)
  • intensive:

13 What is an “attribute domain”?

14 What are we working with?

When we do data analysis using {dplyr}, there are three types of functions we use most often:

  • Boolean operators or predicates
  • Window or vector functions
  • Summary or analysis functions

There are similar

15 Predicate functions for geometries with {sf}

{sf} includes “vectorized” logical operators or tests that work with geometry including:

  • st_is()
  • st_is_valid()
  • st_is_empty()
st_is(us_states, "POLYGON")
 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
st_is(us_states, "MULTIPOLYGON")
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

16 Predicate functions for geometries with {sf}

{sf} also includes more than a dozen predicate functions for working with pairs of simple geometries including:

  • st_intersects
  • st_disjoint
  • st_contains
  • st_covers
  • st_is_within_distance

17 Using predicate functions for spatial joins and filters

st_filter() and st_join() are two functions that rely on these predicate functions to work.

maryland <- filter(us_states, NAME == "Maryland")

st_filter(us_highways, maryland)
Simple feature collection with 470 features and 4 fields
Geometry type: LINESTRING
Dimension:     XY
Bounding box:  xmin: -8881626 ymin: 4621400 xmax: -8367444 ymax: 4897471
Projected CRS: WGS 84 / Pseudo-Mercator
First 10 features:
        LINEARID      FULLNAME RTTYP MTFCC                       geometry
1  1104258795090 Salisbury Byp     M S1100 LINESTRING (-8416214 463670...
2  1103075320064 Salisbury Byp     M S1100 LINESTRING (-8414039 463644...
3  1104258795072 Salisbury Byp     M S1100 LINESTRING (-8417144 463654...
4  1104258845568 Salisbury Byp     M S1100 LINESTRING (-8413964 463644...
5   110191599447 Salisbury Byp     M S1100 LINESTRING (-8409248 463421...
6   110191599448 Salisbury Byp     M S1100 LINESTRING (-8419516 462217...
7  1102149806067 Salisbury Byp     M S1100 LINESTRING (-8412863 463617...
8  1102149813008 Salisbury Byp     M S1100 LINESTRING (-8412725 463596...
9  1102149806121 Salisbury Byp     M S1100 LINESTRING (-8412944 463622...
10 1102166635559 Salisbury Byp     M S1100 LINESTRING (-8412583 463576...
st_filter(us_states, maryland, .predicate = st_disjoint)
Simple feature collection with 50 features and 14 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
First 10 features:
   REGION DIVISION STATEFP  STATENS GEOID STUSPS           NAME LSAD MTFCC
1       3        5      12 00294478    12     FL        Florida   00 G4000
2       2        3      17 01779784    17     IL       Illinois   00 G4000
3       2        4      27 00662849    27     MN      Minnesota   00 G4000
4       1        1      44 01219835    44     RI   Rhode Island   00 G4000
5       4        8      16 01779783    16     ID          Idaho   00 G4000
6       1        1      33 01779794    33     NH  New Hampshire   00 G4000
7       3        5      37 01027616    37     NC North Carolina   00 G4000
8       1        1      50 01779802    50     VT        Vermont   00 G4000
9       1        1      09 01779780    09     CT    Connecticut   00 G4000
10      4        8      35 00897535    35     NM     New Mexico   00 G4000
   FUNCSTAT        ALAND      AWATER    INTPTLAT     INTPTLON
1         A 138962819934 45971472526 +28.3989775 -082.5143005
2         A 143778515726  6216539665 +40.1028754 -089.1526108
3         A 206244837557 18937184315 +46.3159573 -094.1996043
4         A   2677763373  1323686975 +41.5964850 -071.5264901
5         A 214049908397  2391592787 +44.3484222 -114.5588538
6         A  23190126218  1025960758 +43.6726907 -071.5843145
7         A 125935585728 13453835222 +35.5397100 -079.1308636
8         A  23872569964  1030754610 +44.0589536 -072.6710173
9         A  12541666998  1816447681 +41.5798637 -072.7466572
10        A 314198573403   726463825 +34.4346843 -106.1316181
                         geometry
1  MULTIPOLYGON (((-9251622 28...
2  MULTIPOLYGON (((-9784141 46...
3  MULTIPOLYGON (((-10610961 6...
4  MULTIPOLYGON (((-7979248 50...
5  MULTIPOLYGON (((-12952962 6...
6  MULTIPOLYGON (((-7885747 53...
7  MULTIPOLYGON (((-8671763 43...
8  MULTIPOLYGON (((-8019664 54...
9  MULTIPOLYGON (((-8073769 50...
10 MULTIPOLYGON (((-11473113 3...

18 Creating new variables with geometry

  • Measuring feature geometries
  • Comparing feature geometries
  • Joining data based on feature geometries
maryland <- filter(us_states, NAME == "Maryland")

19 Measuring geometries with {sf}

20 Measuring geometries with {sf}

{sf} includes a few different functions for measuring geometries:

  • st_area() (only works with POLYGON and MULTIPOLYGON geometries)
  • st_length() (only wrks with LINESTRING and MULTILINSTRING geometries)
  • st_distance() (requires a pair of objects)

21 Measuring geometries with {sf}

All of these functions are vectorized meaning that they can operate independently on each feature in a sf or sfc object.

They support both sf inputs (data frames) and sfc inputs (lists)—but they always return a vector:

st_area(maryland)
53204014058 [m^2]
st_area(maryland$geometry)
53204014058 [m^2]

22 Measuring geometries with {sf}

But, remember, dplyr::mutate() is designed to work with vectorized functions so you can do us a measurement function inside mutate():

maryland |>
  mutate(
    area = st_area(geometry)
  )
Simple feature collection with 1 feature and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -8848525 ymin: 4563419 xmax: -8347435 ymax: 4825776
Projected CRS: WGS 84 / Pseudo-Mercator
  REGION DIVISION STATEFP  STATENS GEOID STUSPS     NAME LSAD MTFCC FUNCSTAT
1      3        5      24 01714934    24     MD Maryland   00 G4000        A
        ALAND     AWATER    INTPTLAT     INTPTLON
1 25151771744 6979295311 +38.9466584 -076.6744939
                        geometry              area
1 MULTIPOLYGON (((-8433120 47... 53204014058 [m^2]

23 Measuring geometries with {sf}

You can even work with multiple geometries using this same approach:

us_states |>
  mutate(
    area = st_area(geometry),
    distance_to_maryland = st_distance(geometry, maryland)
  )
Simple feature collection with 56 features and 16 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
First 10 features:
   REGION DIVISION STATEFP  STATENS GEOID STUSPS           NAME LSAD MTFCC
1       3        5      54 01779805    54     WV  West Virginia   00 G4000
2       3        5      12 00294478    12     FL        Florida   00 G4000
3       2        3      17 01779784    17     IL       Illinois   00 G4000
4       2        4      27 00662849    27     MN      Minnesota   00 G4000
5       3        5      24 01714934    24     MD       Maryland   00 G4000
6       1        1      44 01219835    44     RI   Rhode Island   00 G4000
7       4        8      16 01779783    16     ID          Idaho   00 G4000
8       1        1      33 01779794    33     NH  New Hampshire   00 G4000
9       3        5      37 01027616    37     NC North Carolina   00 G4000
10      1        1      50 01779802    50     VT        Vermont   00 G4000
   FUNCSTAT        ALAND      AWATER    INTPTLAT     INTPTLON
1         A  62266456923   489045863 +38.6472854 -080.6183274
2         A 138962819934 45971472526 +28.3989775 -082.5143005
3         A 143778515726  6216539665 +40.1028754 -089.1526108
4         A 206244837557 18937184315 +46.3159573 -094.1996043
5         A  25151771744  6979295311 +38.9466584 -076.6744939
6         A   2677763373  1323686975 +41.5964850 -071.5264901
7         A 214049908397  2391592787 +44.3484222 -114.5588538
8         A  23190126218  1025960758 +43.6726907 -071.5843145
9         A 125935585728 13453835222 +35.5397100 -079.1308636
10        A  23872569964  1030754610 +44.0589536 -072.6710173
                         geometry               area distance_to_maryland
1  MULTIPOLYGON (((-8655578 47... 103045932411 [m^2]              0.0 [m]
2  MULTIPOLYGON (((-9251622 28... 240290408316 [m^2]        1122737.0 [m]
3  MULTIPOLYGON (((-9784141 46... 257089838322 [m^2]         893538.7 [m]
4  MULTIPOLYGON (((-10610961 6... 473223859914 [m^2]        1423100.6 [m]
5  MULTIPOLYGON (((-8433120 47...  53204014058 [m^2]              0.0 [m]
6  MULTIPOLYGON (((-7979248 50...   7160497121 [m^2]         490274.7 [m]
7  MULTIPOLYGON (((-12952962 6... 424675068387 [m^2]        3530363.1 [m]
8  MULTIPOLYGON (((-7885747 53...  46326405214 [m^2]         579065.3 [m]
9  MULTIPOLYGON (((-8671763 43... 210941928191 [m^2]         186785.1 [m]
10 MULTIPOLYGON (((-8019664 54...  48271922011 [m^2]         527897.6 [m]

This works to aggregate features by division:

us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME)
  )
Simple feature collection with 10 features and 2 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
# A tibble: 10 × 3
   DIVISION n_states                                                    geometry
   <chr>       <int>                                              <GEOMETRY [m]>
 1 0               5 MULTIPOLYGON (((-18726019 -1633065, -18725998 -1633025, -1…
 2 1               6 MULTIPOLYGON (((-7978692 5038758, -7978581 5039616, -79783…
 3 2               3 POLYGON ((-8290777 4763109, -8291389 4762454, -8291849 476…
 4 3               5 POLYGON ((-9799335 4551032, -9799438 4550965, -9799539 455…
 5 4               7 POLYGON ((-10176807 4921011, -10176847 4920961, -10177047 …
 6 5               9 MULTIPOLYGON (((-9250560 2835330, -9249947 2836819, -92493…
 7 6               4 POLYGON ((-9840190 3523448, -9841106 3523126, -9842801 352…
 8 7               4 POLYGON ((-10148561 3895849, -10148557 3895720, -10148574 …
 9 8               8 POLYGON ((-11473113 3798919, -11473113 3798905, -11473113 …
10 9               5 MULTIPOLYGON (((-19000283 2939831, -19000200 2940106, -190…
us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME)
  ) |>
  ggplot() +
  geom_sf(aes(fill = DIVISION)) +
  theme_minimal()

This doesn’t entirely work (yet):

us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME) # ,
    # .by = DIVISION
  )
Simple feature collection with 10 features and 2 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
# A tibble: 10 × 3
   DIVISION n_states                                                    geometry
   <chr>       <int>                                              <GEOMETRY [m]>
 1 0               5 MULTIPOLYGON (((-18726019 -1633065, -18725998 -1633025, -1…
 2 1               6 MULTIPOLYGON (((-7978692 5038758, -7978581 5039616, -79783…
 3 2               3 POLYGON ((-8290777 4763109, -8291389 4762454, -8291849 476…
 4 3               5 POLYGON ((-9799335 4551032, -9799438 4550965, -9799539 455…
 5 4               7 POLYGON ((-10176807 4921011, -10176847 4920961, -10177047 …
 6 5               9 MULTIPOLYGON (((-9250560 2835330, -9249947 2836819, -92493…
 7 6               4 POLYGON ((-9840190 3523448, -9841106 3523126, -9842801 352…
 8 7               4 POLYGON ((-10148561 3895849, -10148557 3895720, -10148574 …
 9 8               8 POLYGON ((-11473113 3798919, -11473113 3798905, -11473113 …
10 9               5 MULTIPOLYGON (((-19000283 2939831, -19000200 2940106, -190…

But this (explicitly unioning the geometry) does work:

us_states |>
  summarise(
    n_states = n_distinct(NAME),
    geometry = st_union(geometry),
    .by = DIVISION
  )
Simple feature collection with 10 features and 2 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
   DIVISION n_states                       geometry
1         5        9 MULTIPOLYGON (((-9250560 28...
2         3        5 POLYGON ((-9799335 4551032,...
3         4        7 POLYGON ((-10176807 4921011...
4         1        6 MULTIPOLYGON (((-7978692 50...
5         8        8 POLYGON ((-11473113 3798919...
6         9        5 MULTIPOLYGON (((-19000283 2...
7         2        3 POLYGON ((-8290777 4763109,...
8         7        4 POLYGON ((-10148561 3895849...
9         6        4 POLYGON ((-9840190 3523448,...
10        0        5 MULTIPOLYGON (((-18726019 -...
us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME),
    geometry = st_union(geometry)
  ) |>
  st_as_sf() |>
  ggplot() +
  geom_sf(aes(fill = DIVISION)) +
  theme_minimal()

24 Measuring geometries with {sf}

Not all functions work with all geometry types!

  • st_area() only works with POLYGON and MULTIPOLYGON geometries
  • st_length() only works with LINESTRING and MULTILINSTRING geometries

25 Measuring geometries with {sf}

  • st_distance() requires at least two objects

26 Measuring geometries with {lwgeom} and {geosphere}

  • lwgeom::st_perimeter()
  • geosphere::bearing()
us_states |>
  group_by(DIVISION) |>
  summarise(
    n_states = n_distinct(NAME)
  )
Simple feature collection with 10 features and 2 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -19951910 ymin: -1643353 xmax: 20021890 ymax: 11554350
Projected CRS: WGS 84 / Pseudo-Mercator
# A tibble: 10 × 3
   DIVISION n_states                                                    geometry
   <chr>       <int>                                              <GEOMETRY [m]>
 1 0               5 MULTIPOLYGON (((-18726019 -1633065, -18725998 -1633025, -1…
 2 1               6 MULTIPOLYGON (((-7978692 5038758, -7978581 5039616, -79783…
 3 2               3 POLYGON ((-8290777 4763109, -8291389 4762454, -8291849 476…
 4 3               5 POLYGON ((-9799335 4551032, -9799438 4550965, -9799539 455…
 5 4               7 POLYGON ((-10176807 4921011, -10176847 4920961, -10177047 …
 6 5               9 MULTIPOLYGON (((-9250560 2835330, -9249947 2836819, -92493…
 7 6               4 POLYGON ((-9840190 3523448, -9841106 3523126, -9842801 352…
 8 7               4 POLYGON ((-10148561 3895849, -10148557 3895720, -10148574 …
 9 8               8 POLYGON ((-11473113 3798919, -11473113 3798905, -11473113 …
10 9               5 MULTIPOLYGON (((-19000283 2939831, -19000200 2940106, -190…