TidyTuesday Week 8:

Academic Literature on Racial and Ethnic Disparities in Reproductive Medicine in the US
TidyTuesday
Data Visualization
R Programming
2025
Author
Peter Gray
Published
February 25, 2025
Display code
# gc()
# rm(list = ls())
# graphics.off()
# cat('\014')

if(!require(tidyverse)){install.packages("tidyverse"); library(tidyverse)}
if(!require(ggfortify)){install.packages("ggfortify"); library(ggfortify)}
if(!require(patchwork)){install.packages("patchwork"); library(patchwork)}
if(!require(sysfonts)){install.packages("sysfonts"); library(sysfonts)}
if(!require(showtext)){install.packages("showtext"); library(showtext)}
if(!require(RColorBrewer)){install.packages("RColorBrewer"); library(RColorBrewer)}
if(!require(maps)){install.packages("maps"); library(maps)}
if(!require(rcrossref)){install.packages("rcrossref"); library(rcrossref)}

wd <- getwd()


font_add_google("Roboto Mono", "roboto_mono")
font <- "roboto_mono"
showtext_auto()

# Color palette
color <- palette.colors(palette = "Okabe-Ito")
color <- append(color, "gold")
color[1] <- "#D41159"


Custom_Style <- function() {
  ggplot2::theme(
    plot.title = ggplot2::element_text(family=font,
                                       size=24,
                                       face="bold",
                                       color="#222222"),
    plot.subtitle = ggplot2::element_text(family=font,
                                          size=18,
                                          color="#222222"),
    plot.caption = ggplot2::element_text(family=font,
                                         size=10,
                                         color="#222222"),
    
    legend.position = "bottom",
    legend.title = ggplot2::element_text(family=font, 
                                         size=12, 
                                         face="bold", 
                                         color="#222222"),
    # legend.text.align = 0,
    legend.key = ggplot2::element_blank(),
    legend.text = ggplot2::element_text(family=font,
                                        size=9,
                                        color="#222222"),
    
    # Axis format
    axis.text = ggplot2::element_text(family = font,
                                      size=10,
                                      color="#222222"),
    axis.text.x = ggplot2::element_text(margin=ggplot2::margin(5, b = 10), size =8),
    axis.line = ggplot2::element_line(colour = alpha('#222222', 0.5), size =0.5),
    axis.title = ggplot2::element_text(family=font, 
                                       size=12, 
                                       face="bold", 
                                       color="#222222"),
    
    
    # Grid lines
    panel.grid.minor = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_blank(),
    panel.grid.major.x = ggplot2::element_blank(),
    
    
    
    # Very pale cream/yellow background
    panel.background = element_rect(fill = "#FFFBF0",  
                                    color = "#FFFBF0", 
                                    linewidth = 0.5, 
                                    linetype = "solid"),
    plot.background = element_rect(fill = "#FFFBF0",  
                                   color = "#FFFBF0", 
                                   linewidth = 0.5, 
                                   linetype = "solid"),
    legend.background = element_rect(fill = "#FFFBF0",  
                                     color = "#FFFBF0", 
                                     linewidth = 0.5, 
                                     linetype = "solid"),
    
    
  )
}


article_dat <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-25/article_dat.csv')
model_dat <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-25/model_dat.csv')

# Data Cleaning 

#Fucntion to simplify races

simplify_race <- function(race) {
  case_when(
    grepl("African American|Black|NonBlack, Non-Hispanic Black", race, ignore.case = TRUE) ~ "Black",
    grepl("White|Caucasian|Non-Hispanic White", race, ignore.case = TRUE) ~ "White",
    grepl("Asian", race, ignore.case = TRUE) ~ "Asian",
    grepl("Hispanic|Latino", race, ignore.case = TRUE) ~ "Hispanic",
    grepl("Native American|Alaskan", race, ignore.case = TRUE) ~ "Native American",
    grepl("Pacific Islander|Hawaiian", race, ignore.case = TRUE) ~ "Pacific Islander",
    grepl("Unknown|Other|Multiple", race, ignore.case = TRUE) ~ "Other",
    TRUE ~ NA
  )
}

# function to extract study title from doi using crossref package

get_study_info <- function(doi) {
  
  res <- cr_works(doi = doi)
  
  if (is.null(res$data)) {
    return(list(title = NA, first_author = NA))
  }
  
  title <- res$data$title
  
  authors <- res$data$author
  first_author <- if(!is.null(authors) && length(authors) > 0) {
    
    authors[[1]]$family
    
  } else {
    
    NA
    
  }
  
  doi <-res$data$doi
  
  return(list(title = title, first_author = first_author, doi = doi))
  
}

# Publications mentioning Race --------------------------------------------


race_count <- article_dat %>% 
  mutate(across(starts_with(c("race", "eth")), simplify_race)) %>% 
  rowwise() %>%
  group_by(across(starts_with(c("race", "eth")))) %>% 
  summarise() %>%
  pivot_longer(cols = everything(), values_to = "race") %>% 
  filter(!is.na(race) & race != "NA") %>% 
  count(race, sort = TRUE)  

p1 <- race_count %>% 
  ggplot(aes(x = reorder(race, n), y = n, fill = race)) +  
  geom_bar(stat = "identity") +  
  geom_text(aes(label = paste0("n = ", n), fontface = "bold"), hjust = -0.1, size = 3, angle = 0) +  # Add text labels on top of bars
  labs(subtitle = str_wrap("Race Count Distribution of Reproductive Rights studies in the US (2010-2023)", 40),
       x = "Race",
       y = "Count",
       fill = "Race") +
  Custom_Style() +
  coord_flip()  





race_year_count <- article_dat %>%
  mutate(across(starts_with(c("race", "eth")), simplify_race)) %>%
  pivot_longer(cols = starts_with(c("race", "eth")), values_to = "race", names_to = "race_column") %>%
  filter(!is.na(race) & race != "NA" & race != "") %>%
  group_by(year, race) %>%
  summarise(n = n(), .groups = 'drop') %>% 
  group_by(year) %>% 
  mutate(percent = n / sum(n)) %>% 
  arrange(desc(percent)) %>% 
  ungroup()



p2 <- race_year_count %>% 
  ggplot(aes(x = year, y = percent, fill = race)) +  
  geom_col() +
  geom_text(aes(label = scales::percent(percent, accuracy = 1), fontface = "bold"),  
            position = position_stack(vjust = 0.5),  
            size = 3,  
            family = font,
            color = "black") +  
  labs(subtitle = str_wrap("Proportion of Articles related to Reproductive rights mentioning each race (2010-2023)", 40),
       x = "Race",
       y = "Percent", fill ="Race") +  
  Custom_Style() +  
  scale_y_continuous(labels = scales::percent_format())


# Combined Plot


# Clean the data


data <- model_dat %>%
  filter(
    subanalysis == "No",
    measure == "OR",          # Ensures reference group includes 'White'
    !grepl("none", covariates, ignore.case = TRUE),     # Filters out rows where covariates is 'none'
    outcome == "severe maternal morbidity",
    model_number == 5
  ) %>% 
  mutate(doi = factor(doi))

doi <- unique(factor(data$doi))
d = 1


data_i <- data %>% 
  filter(compare != "Unknown") 


data_i <- data_i %>%
  rowwise() %>%
  mutate(info = list(get_study_info(doi)),
         citation = paste0(info$first_author[[1]], " et. al: ", info$doi)) %>% 
  select(-info) 

measure <- unique(data_i$measure)
endpoint <- unique(data_i$outcome)
study_title <- unique(data_i$citation)



p3 <- ggplot(data_i, aes(x = point, y = reorder(compare, point))) +  
  geom_point(size = 3, aes(color = compare)) +  # Points for estimates
  geom_errorbarh(aes(xmin = lower, xmax = upper, color = compare), height = 0.2) +  # Confidence intervals
  geom_vline(xintercept = 1, linetype = "dashed", color = "red") +  # Reference line at 1
  labs(subtitle = paste0('Forest Plot: ', str_to_title(endpoint), " compared with White Women"),
       x = paste0(measure, " (95% CI)"),
       y = "Comparison Group",
       color = "Race", 
       caption = paste("Doi: ", study_title, collapse = "\n")
  ) +
  annotate("text", x = 0.5, y = length(unique(data_i$compare)) + 0.5, label = "Favourable compared with White Women", color = "green", hjust = 0) +
  annotate("text", x = 1.5, y = length(unique(data_i$compare)) + 0.5, label = "Outcomes Worsened compared with White Women", color = "red", hjust = 1) +
  facet_wrap(~ paste(citation, sep = ": "), labeller = labeller(group = function(x) str_wrap(x, width = 20))) +
  Custom_Style() +
  theme(
    strip.background = element_rect(fill = "#FFFBF0", color = "#FFFBF0"), # Green background with white border
    strip.text = element_text(size = 8, face = "bold", color = "black", family = font, hjust = 0) # White bold text
  )



# Combine plots with legend between p1/p2 and p3
combined_plot <- (p1 + p2) / 
  p3 +
  plot_layout(
    guides = "collect"
  ) &
  theme(legend.position = "bottom") &
  plot_annotation(
    title = str_wrap('Academic Literature on Racial and Ethnic Disparities in Reproductive Medicine in the US', 80),
    subtitle = "TidyTuesday: Week 8, 2025",
    theme = Custom_Style()
  ) &
  theme(
    caption = element_text(hjust = 0.5),
    plot.subtitle = element_text(size = 16)
  )