Group a SpatVector by one or more variables — group-by.SpatVector • tidyterra

Most data operations are done on groups defined by variables. group_by.SpatVector() adds new attributes to an existing SpatVector indicating the corresponding groups. See Methods.

Usage

# S3 method for class 'SpatVector'
group_by(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data))

# S3 method for class 'SpatVector'
ungroup(x, ...)

Arguments

.data, x: A SpatVector object. See Methods.
...: <data-masking> In group_by(), variables or computations to group by. Computations are always done on the ungrouped data frame. To perform computations on the grouped data, you need to use a separate mutate() step before the group_by(). Computations are not allowed in nest_by(). In ungroup(), variables to remove from the grouping.
.add: When FALSE, the default, group_by() will override existing groups. To add to the existing groups, use .add = TRUE.
.drop: Drop groups formed by factor levels that don't appear in the data? The default is TRUE except when .data has been previously grouped with .drop = FALSE. See group_by_drop_default() for details.

Value

A SpatVector object with an additional attribute.

Details

See Details on dplyr::group_by().

Methods

Implementation of the generic dplyr::group_by() family functions for SpatVector objects.

When mixing terra and dplyr syntax on a grouped SpatVector (i.e, subsetting a SpatVector like v[1:3,1:2]) the groups attribute can be corrupted. tidyterra would try to re-group the SpatVector. This would be triggered the next time you use a dplyr verb on your SpatVector.

Note also that some operations (as terra::spatSample()) would create a new SpatVector. In these cases, the result won't preserve the groups attribute. Use group_by() to re-group.

Examples

# \donttest{

library(terra)
f <- system.file("ex/lux.shp", package = "terra")
p <- vect(f)

by_name1 <- p |> group_by(NAME_1)

# grouping doesn't change how the SpatVector looks
by_name1
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 12, 6  (geometries, attributes)
#>  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
#>  source      : lux.shp
#>  coord. ref. : lon/lat WGS 84 (EPSG:4326) 
#>  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA       POP
#>  type        : <num>    <chr> <num>    <chr> <num>     <num>
#>  values      :     1 Diekirch     1 Clervaux   312 1.808e+04
#>                    1 Diekirch     2 Diekirch   218 3.254e+04
#>                    1 Diekirch     3  Redange   259 1.866e+04

# But add metadata for grouping: See the coercion to tibble

# Not grouped
p_tbl <- as_tibble(p)
class(p_tbl)
#> [1] "tbl_df"     "tbl"        "data.frame"
head(p_tbl, 3)
#> # A tibble: 3 × 6
#>    ID_1 NAME_1    ID_2 NAME_2    AREA   POP
#>   <dbl> <chr>    <dbl> <chr>    <dbl> <dbl>
#> 1     1 Diekirch     1 Clervaux   312 18081
#> 2     1 Diekirch     2 Diekirch   218 32543
#> 3     1 Diekirch     3 Redange    259 18664

# Grouped
by_name1_tbl <- as_tibble(by_name1)
class(by_name1_tbl)
#> [1] "grouped_df" "tbl_df"     "tbl"        "data.frame"
head(by_name1_tbl, 3)
#> # A tibble: 3 × 6
#> # Groups:   NAME_1 [1]
#>    ID_1 NAME_1    ID_2 NAME_2    AREA   POP
#>   <dbl> <chr>    <dbl> <chr>    <dbl> <dbl>
#> 1     1 Diekirch     1 Clervaux   312 18081
#> 2     1 Diekirch     2 Diekirch   218 32543
#> 3     1 Diekirch     3 Redange    259 18664

# It changes how it acts with the other dplyr verbs:
by_name1 |> summarise(
  pop = mean(POP),
  area = sum(AREA)
)
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 3, 3  (geometries, attributes)
#>  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
#>  coord. ref. : lon/lat WGS 84 (EPSG:4326) 
#>  names       :       NAME_1       pop  area
#>  type        :        <chr>     <num> <num>
#>  values      :     Diekirch 1.824e+04  1128
#>                Grevenmacher  2.37e+04   527
#>                  Luxembourg 1.099e+05   906

# Each call to summarise() removes a layer of grouping
by_name2_name1 <- p |> group_by(NAME_2, NAME_1)

by_name2_name1
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 12, 6  (geometries, attributes)
#>  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
#>  source      : lux.shp
#>  coord. ref. : lon/lat WGS 84 (EPSG:4326) 
#>  names       :  ID_1   NAME_1  ID_2   NAME_2  AREA       POP
#>  type        : <num>    <chr> <num>    <chr> <num>     <num>
#>  values      :     1 Diekirch     1 Clervaux   312 1.808e+04
#>                    1 Diekirch     2 Diekirch   218 3.254e+04
#>                    1 Diekirch     3  Redange   259 1.866e+04
group_data(by_name2_name1)
#> # A tibble: 12 × 3
#>    NAME_2           NAME_1             .rows
#>    <chr>            <chr>        <list<int>>
#>  1 Capellen         Luxembourg           [1]
#>  2 Clervaux         Diekirch             [1]
#>  3 Diekirch         Diekirch             [1]
#>  4 Echternach       Grevenmacher         [1]
#>  5 Esch-sur-Alzette Luxembourg           [1]
#>  6 Grevenmacher     Grevenmacher         [1]
#>  7 Luxembourg       Luxembourg           [1]
#>  8 Mersch           Luxembourg           [1]
#>  9 Redange          Diekirch             [1]
#> 10 Remich           Grevenmacher         [1]
#> 11 Vianden          Diekirch             [1]
#> 12 Wiltz            Diekirch             [1]

by_name2 <- by_name2_name1 |> summarise(n = dplyr::n())
by_name2
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 12, 3  (geometries, attributes)
#>  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
#>  coord. ref. : lon/lat WGS 84 (EPSG:4326) 
#>  names       :   NAME_2     NAME_1     n
#>  type        :    <chr>      <chr> <int>
#>  values      : Capellen Luxembourg     1
#>                Clervaux   Diekirch     1
#>                Diekirch   Diekirch     1
group_data(by_name2)
#> # A tibble: 12 × 2
#>    NAME_2                 .rows
#>    <chr>            <list<int>>
#>  1 Capellen                 [1]
#>  2 Clervaux                 [1]
#>  3 Diekirch                 [1]
#>  4 Echternach               [1]
#>  5 Esch-sur-Alzette         [1]
#>  6 Grevenmacher             [1]
#>  7 Luxembourg               [1]
#>  8 Mersch                   [1]
#>  9 Redange                  [1]
#> 10 Remich                   [1]
#> 11 Vianden                  [1]
#> 12 Wiltz                    [1]

# To removing grouping, use ungroup
by_name2 |>
  ungroup() |>
  summarise(n = sum(n))
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 1, 1  (geometries, attributes)
#>  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
#>  coord. ref. : lon/lat WGS 84 (EPSG:4326) 
#>  names       :     n
#>  type        : <int>
#>  values      :    12

# By default, group_by() overrides existing grouping
by_name2_name1 |>
  group_by(ID_1, ID_2) |>
  group_vars()
#> [1] "ID_1" "ID_2"

# Use add = TRUE to instead append
by_name2_name1 |>
  group_by(ID_1, ID_2, .add = TRUE) |>
  group_vars()
#> [1] "NAME_2" "NAME_1" "ID_1"   "ID_2"  

# You can group by expressions: this is a short-hand
# for a mutate() followed by a group_by()
p |>
  group_by(ID_COMB = ID_1 * 100 / ID_2) |>
  relocate(ID_COMB, .before = 1)
#>  class       : SpatVector 
#>  geometry    : polygons 
#>  dimensions  : 12, 7  (geometries, attributes)
#>  extent      : 5.74414, 6.528252, 49.44781, 50.18162  (xmin, xmax, ymin, ymax)
#>  source      : lux.shp
#>  coord. ref. : lon/lat WGS 84 (EPSG:4326) 
#>  names       : ID_COMB  ID_1   NAME_1  ID_2   NAME_2  AREA       POP
#>  type        :   <num> <num>    <chr> <num>    <chr> <num>     <num>
#>  values      :     100     1 Diekirch     1 Clervaux   312 1.808e+04
#>                     50     1 Diekirch     2 Diekirch   218 3.254e+04
#>                  33.33     1 Diekirch     3  Redange   259 1.866e+04
# }

Group a `SpatVector` by one or more variables