Data visualization practice

Load packages

Show code
library(readr)
library(here) # makes it easier to deal with file paths?
library(dplyr)
library(tidyr)
library(forcats) # for categorical data
library(janitor) # data wrangling i.e. clean_names !!

library(ggplot2)
library(plotly) # interactive graphics
library(DT)
library(leaflet)
Show code
delta_visits_raw <- read_csv(here("data/Socioecological_monitoring_data.csv")) # here function of most utility in a quarto doc where working directories and relative paths can get funky

Data exploration

Show code
### Check out column names
colnames(delta_visits_raw)
 [1] "EcoRestore_approximate_location" "Reach"                          
 [3] "Latitude"                        "Longitude"                      
 [5] "Date"                            "Time_of_Day"                    
 [7] "sm_boat"                         "med_boat"                       
 [9] "lrg_boat"                        "bank_angler"                    
[11] "scientist"                       "cars"                           
[13] "notes"                          
Show code
### Peek at each column and class
glimpse(delta_visits_raw)
Rows: 55
Columns: 13
$ EcoRestore_approximate_location <chr> "Decker Island", "Decker Island", "Dec…
$ Reach                           <chr> "Brannan to Decker Island", "Decker Is…
$ Latitude                        <dbl> 38.10587, 38.10587, 38.08456, 38.08456…
$ Longitude                       <dbl> -121.7064, -121.7064, -121.7204, -121.…
$ Date                            <date> 2017-07-07, 2017-07-07, 2017-07-07, 2…
$ Time_of_Day                     <chr> "unknown", "unknown", "unknown", "unkn…
$ sm_boat                         <dbl> 0, 0, 0, 0, 2, 0, 0, 7, 1, 0, 0, 0, 0,…
$ med_boat                        <dbl> 2, 4, 0, 1, 10, 0, 0, 1, 2, 0, 1, 6, 1…
$ lrg_boat                        <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,…
$ bank_angler                     <dbl> 1, 3, 0, 0, 0, 0, 0, 0, 2, 0, 0, 5, 0,…
$ scientist                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ cars                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ notes                           <chr> "no notes", "no notes", "Nobody or tra…
Show code
### From when to when
range(delta_visits_raw$Date)
[1] "2017-07-07" "2018-03-13"
Show code
### Which time of day?
unique(delta_visits_raw$Time_of_Day)
[1] "unknown" "morning"

Clean up the data

Show code
delta_visits <- delta_visits_raw %>% 
  clean_names() #lowercase and snake_case all colnames
delta_visits
# A tibble: 55 × 13
   eco_restore_approximate_loc…¹ reach latitude longitude date       time_of_day
   <chr>                         <chr>    <dbl>     <dbl> <date>     <chr>      
 1 Decker Island                 Bran…     38.1     -122. 2017-07-07 unknown    
 2 Decker Island                 Deck…     38.1     -122. 2017-07-07 unknown    
 3 Decker Island                 Deck…     38.1     -122. 2017-07-07 unknown    
 4 Decker Island                 Deck…     38.1     -122. 2017-09-13 unknown    
 5 Decker Island                 Bran…     38.1     -122. 2017-11-07 unknown    
 6 Decker Island                 Deck…     38.1     -122. 2017-11-07 unknown    
 7 Decker Island                 Deck…     38.1     -122. 2017-11-07 unknown    
 8 Decker Island                 Bran…     38.1     -122. 2017-12-08 morning    
 9 Decker Island                 Deck…     38.1     -122. 2017-12-08 morning    
10 Decker Island                 Deck…     38.1     -122. 2017-12-08 morning    
# ℹ 45 more rows
# ℹ abbreviated name: ¹​eco_restore_approximate_location
# ℹ 7 more variables: sm_boat <dbl>, med_boat <dbl>, lrg_boat <dbl>,
#   bank_angler <dbl>, scientist <dbl>, cars <dbl>, notes <chr>
Show code
visits_long <- delta_visits %>% 
  pivot_longer(
    col = c(sm_boat, med_boat, lrg_boat, bank_angler, scientist, cars),
    names_to = "visitor_type",
    values_to = "quantity"
  ) %>% 
  rename(restore_loc = eco_restore_approximate_location) %>% 
  select(-notes)
 glimpse(visits_long)
Rows: 330
Columns: 8
$ restore_loc  <chr> "Decker Island", "Decker Island", "Decker Island", "Decke…
$ reach        <chr> "Brannan to Decker Island", "Brannan to Decker Island", "…
$ latitude     <dbl> 38.10587, 38.10587, 38.10587, 38.10587, 38.10587, 38.1058…
$ longitude    <dbl> -121.7064, -121.7064, -121.7064, -121.7064, -121.7064, -1…
$ date         <date> 2017-07-07, 2017-07-07, 2017-07-07, 2017-07-07, 2017-07-…
$ time_of_day  <chr> "unknown", "unknown", "unknown", "unknown", "unknown", "u…
$ visitor_type <chr> "sm_boat", "med_boat", "lrg_boat", "bank_angler", "scient…
$ quantity     <dbl> 0, 2, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
Show code
daily_visits_loc <- visits_long %>% 
  group_by(restore_loc, date, visitor_type) %>% # doesn't change anything just creates a "flag" for next operation
  summarize(daily_visits = sum(quantity), .groups = "drop")
glimpse(daily_visits_loc)
Rows: 144
Columns: 4
$ restore_loc  <chr> "Decker Island", "Decker Island", "Decker Island", "Decke…
$ date         <date> 2017-07-07, 2017-07-07, 2017-07-07, 2017-07-07, 2017-07-…
$ visitor_type <chr> "bank_angler", "cars", "lrg_boat", "med_boat", "scientist…
$ daily_visits <dbl> 4, 0, 0, 6, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 10, 0, 2, 2,…

Data vis

Show code
daily_visits_loc %>% # this construction allows us to modify the dataframe in-situ b4 plotting
  filter(daily_visits < 30,
  visitor_type %in% c("sm_boat", "lrg_boat")) %>% 
  ggplot(aes(x = restore_loc, y = daily_visits)) +
    geom_boxplot() + # or `violin()` or `col()` etc etc
    labs(title = "Plot title")

Show code
ggplot(data = daily_visits_loc,
       aes(x = restore_loc, y = daily_visits)) +
  geom_col(color = 'red',
           fill = "steelblue")

Show code
daily_visits_loc %>% 
  ggplot(aes(y = restore_loc, x = daily_visits, fill = visitor_type)) + # flipping x and y order -> horizontal
  geom_col() +
  labs(x = "Number of visits",
       y = "Restoration location",
       fill = "Visitor type",
       title = "Total visits to the restoration areas",
       subtitle = "Sum of all visits between July and March 2018"
  ) + 
    scale_x_continuous(breaks = seq(0, 120, 20), expand = c(0, 0)) +
    theme_minimal() +
    theme(
      legend.position = "bottom",
      axis.ticks.y = element_blank()
    )

Show code
daily_visits_totals <- daily_visits_loc %>%
  group_by(restore_loc) %>%
  mutate(total = sum(daily_visits)) %>%
  ungroup() %>%
  mutate(restore_loc = fct_reorder(restore_loc, desc(total)))

unique(daily_visits_totals$restore_loc)
 [1] Decker Island            Grizzly Bay              Honker Bay/Chipps Island
 [4] North Delta              Prospect                 SJ River                
 [7] SW Suisun Marsh          Sherman Island           Twitchell Island        
[10] Wildlands               
10 Levels: Prospect Grizzly Bay North Delta ... Sherman Island
Show code
facet_plot <- ggplot(data = daily_visits_totals,
       aes(x = visitor_type, y = daily_visits,
           fill = visitor_type)) +
    geom_col() +
    facet_wrap(~restore_loc,
               scales = "free_y", # so can be different per facet
               ncol   = 5,
               nrow   = 2) +
    scale_fill_viridis_d() +
    labs(x        = "Type of visitor",
         y        = "Number of Visits",
         title    = "Total Number of Visits to Delta Restoration Areas",
         subtitle = "Sum of all visits during study period") +
    theme_bw() +
    theme(legend.position = "bottom",
          axis.ticks.x    = element_blank(),
          axis.text.x     = element_blank())

facet_plot

Make it interactive

Show code
ggplotly(facet_plot, tooltip = c("x", "y"))

Show an interactive table using DT

Show code
datatable(delta_visits) # various options for how it displays

Interactive map with leaflet

Show code
restoration_sites <- delta_visits_raw %>% 
  clean_names() %>% 
  distinct(restore_loc = eco_restore_approximate_location,
        latitude, longitude) %>% 
  drop_na(latitude, longitude)

leaflet(restoration_sites) %>% 
  addTiles() %>% 
  addMarkers(
    lng = ~longitude,
    lat = ~latitude,
    popup = ~restore_loc
  )