A few days ago, as I was applying for jobs, I wanted to have a fresh project that I could show off, and I thought abortions!! This data is from before the supreme court’s Dobbs decision and it doesn’t include complications or death from restrictions to reproductive health, so unfortunately I can’t show the ties between those two things. It does show a general overview of trends by age group and states over time. I’m actually proud of this, I did a lot in a few hours. I even got to making some custom functions from subsetting data to making maps. Then I ended up making it into a dashboard, ‘cause of course!! If you want that though, you’ll have to go to the codeberg link below… Anyway, here’s what I wrote and I’m just gonna show some visualizations; specifically, data in raw numbers, and not rates (cases per 1,000 birthing people) or ratios (cases per 1,000 births).
Load libraries
library(tidyverse)
library(usmap)
NationalAndStatePregnancy <- read_csv("NationalAndStatePregnancy_PublicUse.csv")
NSP <- tibble(NationalAndStatePregnancy) %>%
select(!c(notes, versiondate, populationsource)) %>%
pivot_longer(3:101, names_to = "names", values_to = "values") %>%
mutate(category = case_when(
str_detect(names, "pregnancy|pregnancies") ~ "pregnancy",
str_detect(names, "abortion") ~ "abortion",
str_detect(names, "birth") ~ "birth",
str_detect(names, "miscarriage") ~ "miscarriage",
str_detect(names, "population") ~ "population",
TRUE ~ "other"
)) %>%
relocate(category, .before = names) %>%
mutate(subcategory = case_when(
str_detect(names, "total") ~ "total",
str_detect(names, "rate") ~ "rate",
str_detect(names, "ratio") ~ "ratio",
TRUE ~ "number"
)) %>%
relocate(subcategory, .after = category) %>%
mutate(names = case_when(
str_detect(names, "lt15") ~ "14 and Younger",
str_detect(names, "1517") ~ "15 to 17",
str_detect(names, "1819") ~ "18 to 19",
str_detect(names, "1519") ~ "15 to 19",
str_detect(names, "lt20") ~ "19 and Younger",
str_detect(names, "2024") ~ "20 to 24",
str_detect(names, "2529") ~ "25 to 29",
str_detect(names, "3034") ~ "30 to 34",
str_detect(names, "3539") ~ "35 to 39",
str_detect(names, "40plus") ~ "40 and Older",
str_detect(names, "1544") ~ "15 to 44",
TRUE ~ names
))
Create function to subset data by state, category (pregnancy, abortion, birth, miscarriage, population) and subcategory (number, rate, ratio). Then create functions to visualize the data for each subcaregory.
subdata <- function(sta, cat, sub) {
subNSP <- NSP %>%
filter(state == sta,
category == cat,
subcategory == sub) %>%
filter(!(names %in% c("15 to 19", "15 to 44", "19 and Younger"))) %>%
select(!c(state, category, subcategory))
return(subNSP)
}
numberviz <- function(dat, tit) {
vizNSP <- dat %>%
ggplot(aes(x = year, y = values, fill = names)) +
geom_col() +
scale_y_continuous(labels = scales::label_number(suffix = "M", scale = 1e-6)) +
ggtitle(paste("U.S.", tit, "by Age Group and Year", sep = " ")) +
labs(subtitle = "1973 to 2020",
x = "Year", y = tit, fill = "Age") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
return(vizNSP)
}
rateviz <- function(dat, tit) {
vizNSP <- dat %>%
ggplot(aes(x = year, y = values, color = names)) +
geom_line() +
geom_smooth() +
ggtitle(paste("U.S.", tit, "by Age Group and Year", sep = " ")) +
labs(subtitle = "1973 to 2020",
x = "Year", y = tit, color = "Age") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
return(vizNSP)
}
ratioviz <- function(dat) {
vizNSP <- dat %>%
ggplot(aes(x = year, y = names, fill = values)) +
geom_tile() +
ggtitle("U.S. Abortions per 1,000 Births by Age Group and Year") +
labs(subtitle = "1973 to 2020",
x = "Year", y = "Age", fill = "Ratio") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
return(vizNSP)
}
Call the functions
NSP_US_POP <- subdata("US", "population", "number")
NSP_US_POP_VIZ <- numberviz(NSP_US_POP, "Population")
NSP_US_Pregnancy <- subdata("US", "pregnancy", "number")
NSP_US_Pregnancy_VIZ <- numberviz(NSP_US_Pregnancy, "Pregnancies")
NSP_US_Pregnancy_Rate <- subdata("US", "pregnancy", "rate")
NSP_US_Pregnancy_Rate_VIZ <- rateviz(NSP_US_Pregnancy_Rate, "Pregnancy Rates")
NSP_US_Abortion <- subdata("US", "abortion", "number")
NSP_US_Abortion_VIZ <- numberviz(NSP_US_Abortion, "Abortions")
NSP_US_Abortion_Rate <- subdata("US", "abortion", "rate")
NSP_US_Abortion_Rate_VIZ <- rateviz(NSP_US_Abortion_Rate, "Abortion Rates")
NSP_US_Abortion_Ratio <- subdata("US", "abortion", "ratio")
NSP_US_Abortion_Ratio_VIZ <- ratioviz(NSP_US_Abortion_Ratio)
NSP_US_Birth <- subdata("US", "birth", "number")
NSP_US_Birth_VIZ <- numberviz(NSP_US_Birth, "Births")
NSP_US_Birth_Rate <- subdata("US", "birth", "rate")
NSP_US_Birth_Rate_VIZ <- rateviz(NSP_US_Birth_Rate, "Birth Rates")
NSP_US_Miscarriage <- subdata("US", "miscarriage", "number")
NSP_US_Miscarriage_VIZ <- numberviz(NSP_US_Miscarriage, "Miscarriages")
Get a map of the US and create functions to join the map with the data and visualize it
map <- us_map() %>%
rename(state = abbr)
mapdata <- function(yea, cat, sub) {
subNSP <- NSP %>%
filter(state != c("US"),
year == yea,
category == cat,
subcategory == "total") %>%
select(!c(category, subcategory))
if (sub == "number") {
subNSP <- subNSP[!grepl("rate|ratio", subNSP$names), ]
} else if (sub == "rate") {
subNSP <- subNSP[!grepl("ratio|abortions|births|miscarriages|pregnancies", subNSP$names), ]
} else if (sub == "ratio") {
subNSP <- subNSP[!grepl("rate|abortions|births|miscarriages|pregnancies", subNSP$names), ]
}
subMap <- left_join(map, subNSP, by = "state") %>%
select(!c(year, names, fips, full)) %>%
relocate(values, .after = state)
return(subMap)
}
mapviz <- function(dat, tit, yea) {
viz <- ggplot(dat) +
geom_sf(aes(fill = values)) +
scale_fill_continuous(labels = function(x) {
ifelse(x >= 1000, paste0(round(x/1000), "k"), x)
}) +
ggtitle(paste("U.S.", tit, yea, sep = " ")) +
labs(fill = tit) +
theme_void() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.title = element_text(hjust = 0.5))
return(viz)
}
Call the function
NSP_US_Pregnancy_Total <- mapdata(2020, "pregnancy", "number")
NSP_US_Pregnancy_Total_VIZ <- mapviz(NSP_US_Pregnancy_Total, "Pregnancies", 2020)
NSP_US_Pregnancy_Rate_Total <- mapdata(2020, "pregnancy", "rate")
NSP_US_Pregnancy_Rate_Total_VIZ <- mapviz(NSP_US_Pregnancy_Rate_Total, "Pregnancy Rate", 2020) +
labs(subtitle = "per 1,000 Birthing People")
NSP_US_Abortion_Total <- mapdata(2020, "abortion", "number")
NSP_US_Abortion_Total_VIZ <- mapviz(NSP_US_Abortion_Total, "Abortions", 2020)
NSP_US_Abortion_Rate_Total <- mapdata(2020, "abortion", "rate")
NSP_US_Abortion_Rate_Total_VIZ <- mapviz(NSP_US_Abortion_Rate_Total, "Abortion Rate", 2020) +
labs(subtitle = "per 1,000 Birthing People")
NSP_US_Abortion_Ratio_Total <- mapdata(2020, "abortion", "ratio")
NSP_US_Abortion_Ratio_Total_VIZ <- mapviz(NSP_US_Abortion_Ratio_Total, "Abortion Ratio", 2020) +
labs(subtitle = "per 1,000 Births")
NSP_US_Birth_Total <- mapdata(2020, "birth", "number")
NSP_US_Birth_Total_VIZ <- mapviz(NSP_US_Birth_Total, "Births", 2020)
NSP_US_Birth_Rate_Total <- mapdata(2020, "birth", "rate")
NSP_US_Birth_Rate_Total_VIZ <- mapviz(NSP_US_Birth_Rate_Total, "Birth Rate", 2020) +
labs(subtitle = "per 1,000 Birthing People")
NSP_US_Miscarriage_Total <- mapdata(2020, "miscarriage", "number")
NSP_US_Miscarriage_Total_VIZ <- mapviz(NSP_US_Miscarriage_Total, "Miscarriages", 2020)
Citations
Maddow-Zimet I, Kost K and Finn S, Pregnancies, Births and Abortions in the United States: National and State Trends by Age, New York: Guttmacher Institute, 2020. Data set [versiondate] retrieved from osf.io/kthnf