TidyTuesday Week 20: Water Quality at Sydney Beaches
This week we’re exploring the water quality of Sydney’s iconic beaches. The data is available at the New South Wales State Government Beachwatch website. Beachwatch and our partners monitor water quality at swim sites to ensure that recreational water environments are managed as safely as possible so that as many people as possible can benefit from using the water.
TidyTuesday
Data Visualization
R Programming
2025
Author
Peter Gray
Published
May 20, 2025
Water Quality :::
1. R code
Show code
# Load the packages in ----------------------------------------------------if(!require(tidyverse)){install.packages("tidyverse"); library(tidyverse)}if(!require(patchwork)){install.packages("patchwork"); library(patchwork)}if(!require(ggplot2)){install.packages("ggplot2"); library(ggplot2)}if(!require(scales)){install.packages("scales"); library(scales)}if(!require(RColorBrewer)){install.packages("RColorBrewer"); library(RColorBrewer)}# I stick all my styling into a CUsotm PAckage to tidy up my code and keep it consistent over the timeif(!require(CustomGGPlot2Theme)){devtools::install("CustomGGPlot2Theme"); library(CustomGGPlot2Theme)}water_quality<-readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-05-20/water_quality.csv')weather<-readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-05-20/weather.csv')%>%select(-c(longitude, latitude))water_quality$date<-as.Date(water_quality$date)weather$date<-as.Date(weather$date)# Remove specific yearscombined<-left_join(water_quality, weather, by ="date")%>%mutate(Year =year(date), Month =month(date, label =T), Day =day(date))%>%filter(Year>=2000,Year!=2025)# Average temperature by Year and Monthstripes_data<-combined%>%group_by(Year, Month)%>%summarise(avg_temp =mean(max_temp_C, na.rm =TRUE), average_bacteria =mean(enterococci_cfu_100ml, na.rm =T), average_rain =mean(precipitation_mm, na.rm =T),.groups ="drop")# Fix error in mean calculationmaxmin<-range(stripes_data$avg_temp, na.rm =TRUE)md<-mean(stripes_data$avg_temp, na.rm =TRUE)# Fixed: was incorrectly referring to `stripes_data$stripes_data`# Color palettecol_strip<-brewer.pal(11, "RdBu")col_strip<-adjustcolor(col_strip, alpha.f =0.5)# Create a date column for plotting (necessary for proper x-axis handling)# Ensure Month is a factor for correct month orderingstripes_data$Month_num<-as.numeric(stripes_data$Month)# Compute the original ranges for correct inverse transformation# Compute the original ranges for correct inverse transformation# Compute the original ranges for correct inverse transformationrain_range<-range(stripes_data$average_rain, na.rm =TRUE)bac_range<-range(stripes_data$average_bacteria, na.rm =TRUE)p1<-ggplot(stripes_data, aes(x =Month, y =1, fill =avg_temp))+geom_tile(height =0.4)+# Explicit tile height kept for visibilityscale_fill_gradientn( colors =rev(col_strip), values =rescale(c(maxmin[1], md, maxmin[2])), na.value ="gray80")+# Overlay bacteria as points – now with a mapping that creates a legend entry:geom_point(aes( x =Month_num, y =scales::rescale(average_bacteria, to =c(0.8, 1.2)), color ="Average Enterococci per 100ml"# This will create a legend key labeled "Bacteria"), size =0.5, group =1)+geom_segment(aes(x =Month_num, xend =Month_num, y =0.8, yend =scales::rescale(average_bacteria, to =c(0.8, 1.2))), color ="black", size =0.5)+# Overlay rainfall as a line – now with a mapping that creates a legend entry:geom_line(aes( x =Month_num, y =scales::rescale(average_rain, to =c(0.8, 1.2)), color ="Average Monthly Rainfall (mm)"# This will create a legend key labeled "Rainfall"), size =0.5, group =1)+labs( title =str_wrap("Relationship between Average Enterococci per 100ml, Average Temperature, and Average Rainfall", 70), caption ="Tidy Tuesday 2025 Week 20", x ="Month", y =NULL, fill ="Avg Temp (°C)")+Custom_Style()+# Use your pre-defined theme_strip, if availabletheme( panel.grid =element_blank(), plot.title =element_text(hjust =0.5),# Make facet label strips transparent: strip.background =element_rect(fill ="transparent", color =NA))+# Dual y-axis: left for rainfall, right for bacteriascale_y_continuous( name ="Average Monthly Rainfall (mm)", limits =c(0.8, 1.2), breaks =seq(0.8, 1.2, length.out =5), labels =function(x){# Inverse transformation for rainfall:round((x-0.8)/0.4*diff(rain_range)+rain_range[1], digits =1)}, sec.axis =sec_axis( trans =~(.-0.8)/0.4*diff(bac_range)+bac_range[1], name ="Average Enterococci per 100ml"))+scale_color_manual( name ="", values =c("Average Enterococci per 100ml"="black", "Average Monthly Rainfall (mm)"="blue"), guide =guide_legend( override.aes =list( linetype =c(0, 1), shape =c(16, NA))))+facet_wrap(~Year)