Assessing the Impact of SFFA on Campus Diversity
STA 9750 Mini-Project #01
1 Introduction
In June 2023, the Supreme Court’s decision in Students for Fair Admissions (SFFA) v. Harvard ended race-aware admissions programs in the United States higher education institutions. This report analyzes the ruling’s effect on demographic shifts using data from the Integrated Post-secondary Education Data System (IPEDS). Through data summarization and exploratory analysis, I compare the diversity composition of institutions like Baruch College and the CUNY system to “control” systems like the California Public University system. The goal is to determine whether observed changes in diversity metrics post-SFFA reflect normal demographic variation or a structural shift in the admissions landscape.
2 Data Acquisition and Preparation
Data is sourced directly from the National Center for Education Statistics (NCES) IPEDS data center, covering institutional descriptions and enrollment records from 2010 to 2024.
View R Code
#' Acquire IPEDS Data for MP#01
#' This function will acquire and standardize all data for MP#01
#' from IPEDS (https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx)
#'
#' We're starting in 2010 as the data seems to be reasonably complete
#' after that point.
acquire_ipeds_data <- function(start_year=2010, end_year=2024){
data_dir <- file.path("data", "mp01")
if(!dir.exists(data_dir)){
dir.create(data_dir, showWarnings=FALSE, recursive=TRUE)
}
YEARS <- seq(start_year, end_year)
EFA_ALL <- map(YEARS, function(yy){
if(yy <= 2022){
ef_url <- glue("https://nces.ed.gov/ipeds/datacenter/data/EF{yy}A.zip")
} else {
ef_url <- glue("https://nces.ed.gov/ipeds/data-generator?year={yy}&tableName=EF{yy}A&HasRV=0&type=csv")
}
ef_file <- file.path(data_dir, glue("ef{yy}a.csv.zip"))
if(!file.exists(ef_file)){
message(glue("Downloading Enrollment Data for {yy} from {ef_url}"))
download.file(ef_url, destfile = ef_file, quiet=TRUE, mode="wb")
}
read_csv(ef_file,
show_col_types=FALSE) |>
mutate(year = yy,
# American Indian or Alaskan Native
enrollment_m_aian = EFAIANM,
enrollment_f_aian = EFAIANW,
# Asian
enrollment_m_asia = EFASIAM,
enrollment_f_asia = EFASIAW,
# Black or African-American,
enrollment_m_bkaa = EFBKAAM,
enrollment_f_bkaa = EFBKAAW,
# Hispanic
enrollment_m_hisp = EFHISPM,
enrollment_f_hisp = EFHISPW,
# Native Hawaiian or Other Pacific Islander
enrollment_m_nhpi = EFNHPIM,
enrollment_f_nhpi = EFNHPIW,
# White
enrollment_m_whit = EFWHITM,
enrollment_f_whit = EFWHITW,
# Two or More Races
enrollment_m_2mor = EF2MORM,
enrollment_f_2mor = EF2MORW,
# Unknown / Undisclosed Race
enrollment_m_unkn = EFUNKNM,
enrollment_f_unkn = EFUNKNW,
# US Non-Resident
enrollment_m_nral = EFNRALM,
enrollment_f_nral = EFNRALW,
) |> filter(
(EFALEVEL %in% c(2, 12)) | (LINE %in% c(1, 15))
# Per 2024 Data Dictionary,
# - EFALEVEL 2 = undergrad
# - EFALELVE 12 = grad
# - Line 1 = first year first time full-time undergrad
# - Line 15 = first year first time part-time undergrad
) |> mutate(level = case_when(
EFALEVEL == 2 ~ "all undergrad",
EFALEVEL == 12 ~ "all graduate",
LINE %in% c(1, 15) ~ "first year undergrad"
)
) |>
select(institution_id = UNITID,
year,
level,
starts_with("enrollment_")) |>
group_by(institution_id,
year,
level) |>
summarize(across(starts_with("enrollment_"), sum),
.groups = "drop")
}) |> bind_rows()
DESC_ALL <- map(YEARS, function(yy){
if(yy <= 2022){
hd_url <- glue("https://nces.ed.gov/ipeds/datacenter/data/HD{yy}.zip")
} else {
hd_url <- glue("https://nces.ed.gov/ipeds/data-generator?year={yy}&tableName=HD{yy}&HasRV=0&type=csv")
}
hd_file <- file.path(data_dir, glue("hd{yy}.csv.zip"))
if(!file.exists(hd_file)){
message(glue("Downloading Institutional Descriptions for {yy} from {hd_url}"))
download.file(hd_url, destfile = hd_file, quiet=TRUE, mode="wb")
}
suppressWarnings(
read_csv(hd_file,
show_col_types=FALSE,
locale=locale(encoding=if_else(yy==2024, "utf-8", "windows-1252"))) |>
mutate(year = yy,
INSTNM) |>
select(institution_id = UNITID,
institution_name = INSTNM,
state = STABBR,
year)
)
}) |> bind_rows()
inner_join(EFA_ALL,
DESC_ALL,
join_by(institution_id == institution_id,
year == year))
}
IPEDS <- acquire_ipeds_data()View R Code
# 1. Identify CUNY and California Public (UC/Cal State) systems
IPEDS <- IPEDS |>
mutate(
# Task 2: Check the 'institution_name' column for "CUNY"
is_cuny = str_detect(institution_name, "CUNY"),
# Task 3: Check for UC OR Cal State
is_calpublic = (str_detect(institution_name, "University of California") |
str_detect(institution_name, "California State University"))
)View R Code
# 2. CREATE the verification object
cal_schools_check <- IPEDS |>
filter(is_calpublic) |>
distinct(institution_name)View R Code
# 3. Create the formatted table
cal_schools_check |>
head(10) |>
gt() |>
tab_header(
title = "California Public University System Verification",
subtitle = "Schools identified via name-matching logic"
) |>
style_gt()| California Public University System Verification |
| Schools identified via name-matching logic |
| institution_name |
|---|
| University of California Hastings College of Law |
| California State University-Bakersfield |
| California State University-Stanislaus |
| California State University-San Bernardino |
| California State University-Chico |
| California State University-Dominguez Hills |
| California State University-Fresno |
| California State University-Fullerton |
| California State University-East Bay |
| California State University-Long Beach |
3 Exploratory Analysis
The dataset contains 219,316 rows, representing a comprehensive view of institutional enrollment. The summary below confirms data integrity across the 15-year period.
View R Code
# 1. View the structure and data types
dplyr::glimpse(IPEDS)Rows: 219,316
Columns: 25
$ institution_id <dbl> 100654, 100654, 100654, 100663, 100663, 100663, 1006…
$ year <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010…
$ level <chr> "all graduate", "all undergrad", "first year undergr…
$ enrollment_m_aian <dbl> 2, 2, 0, 5, 13, 0, 0, 0, 0, 10, 47, 1, 1, 2, 0, 10, …
$ enrollment_f_aian <dbl> 0, 1, 0, 16, 22, 3, 0, 0, 0, 10, 36, 5, 0, 0, 0, 18,…
$ enrollment_m_asia <dbl> 4, 2, 0, 114, 210, 54, 0, 0, 0, 28, 98, 13, 1, 0, 0,…
$ enrollment_f_asia <dbl> 8, 7, 1, 191, 245, 55, 1, 3, 0, 24, 105, 16, 7, 1, 0…
$ enrollment_m_bkaa <dbl> 199, 2250, 527, 182, 889, 147, 59, 42, 0, 47, 323, 4…
$ enrollment_f_bkaa <dbl> 430, 2498, 555, 669, 2016, 260, 94, 85, 0, 72, 565, …
$ enrollment_m_hisp <dbl> 0, 9, 3, 37, 79, 14, 4, 9, 0, 10, 76, 10, 2, 7, 2, 5…
$ enrollment_f_hisp <dbl> 2, 7, 1, 82, 141, 14, 2, 3, 0, 6, 77, 14, 3, 12, 4, …
$ enrollment_m_nhpi <dbl> 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 7, 5…
$ enrollment_f_nhpi <dbl> 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 3, 3…
$ enrollment_m_whit <dbl> 47, 59, 7, 1713, 3081, 455, 90, 81, 3, 729, 2431, 27…
$ enrollment_f_whit <dbl> 125, 42, 9, 2886, 3601, 490, 67, 46, 1, 437, 1856, 1…
$ enrollment_m_2mor <dbl> 0, 0, 0, 16, 54, 18, 0, 0, 0, 1, 49, 3, 0, 1, 1, 19,…
$ enrollment_f_2mor <dbl> 0, 0, 0, 30, 75, 34, 0, 0, 0, 4, 40, 10, 0, 1, 0, 33…
$ enrollment_m_unkn <dbl> 0, 10, 9, 64, 179, 5, 23, 40, 0, 23, 47, 5, 1, 54, 3…
$ enrollment_f_unkn <dbl> 4, 10, 7, 92, 206, 7, 39, 61, 0, 10, 63, 7, 3, 71, 4…
$ enrollment_m_nral <dbl> 37, 23, 2, 242, 99, 5, 0, 0, 0, 134, 120, 12, 4, 12,…
$ enrollment_f_nral <dbl> 16, 20, 2, 176, 110, 8, 0, 0, 0, 64, 71, 1, 11, 13, …
$ institution_name <chr> "Alabama A & M University", "Alabama A & M Universit…
$ state <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL"…
$ is_cuny <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
$ is_calpublic <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
View R Code
# 2. Generate a high-level summary
skimr::skim(IPEDS)| Name | IPEDS |
| Number of rows | 219316 |
| Number of columns | 25 |
| _______________________ | |
| Column type frequency: | |
| character | 3 |
| logical | 2 |
| numeric | 20 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| level | 0 | 1 | 12 | 20 | 0 | 3 | 0 |
| institution_name | 0 | 1 | 3 | 105 | 0 | 11243 | 0 |
| state | 0 | 1 | 2 | 2 | 0 | 59 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| is_cuny | 0 | 1 | 0.00 | FAL: 218606, TRU: 710 |
| is_calpublic | 0 | 1 | 0.01 | FAL: 218126, TRU: 1190 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| institution_id | 0 | 1 | 274221.74 | 132743.57 | 100654 | 167039 | 216597 | 438586 | 500555 | ▇▇▁▂▇ |
| year | 0 | 1 | 2016.65 | 4.30 | 2010 | 2013 | 2016 | 2020 | 2024 | ▇▇▇▇▆ |
| enrollment_m_aian | 0 | 1 | 4.52 | 23.05 | 0 | 0 | 0 | 2 | 1498 | ▇▁▁▁▁ |
| enrollment_f_aian | 0 | 1 | 6.89 | 37.94 | 0 | 0 | 0 | 3 | 1719 | ▇▁▁▁▁ |
| enrollment_m_asia | 0 | 1 | 45.47 | 230.33 | 0 | 0 | 1 | 11 | 6541 | ▇▁▁▁▁ |
| enrollment_f_asia | 0 | 1 | 52.39 | 241.71 | 0 | 0 | 2 | 16 | 6378 | ▇▁▁▁▁ |
| enrollment_m_bkaa | 0 | 1 | 75.73 | 250.89 | 0 | 1 | 9 | 49 | 10401 | ▇▁▁▁▁ |
| enrollment_f_bkaa | 0 | 1 | 128.62 | 486.55 | 0 | 2 | 15 | 71 | 36035 | ▇▁▁▁▁ |
| enrollment_m_hisp | 0 | 1 | 120.28 | 532.11 | 0 | 1 | 7 | 48 | 19464 | ▇▁▁▁▁ |
| enrollment_f_hisp | 0 | 1 | 170.69 | 726.96 | 0 | 2 | 14 | 74 | 27342 | ▇▁▁▁▁ |
| enrollment_m_nhpi | 0 | 1 | 2.11 | 15.42 | 0 | 0 | 0 | 1 | 1354 | ▇▁▁▁▁ |
| enrollment_f_nhpi | 0 | 1 | 2.72 | 21.14 | 0 | 0 | 0 | 1 | 1556 | ▇▁▁▁▁ |
| enrollment_m_whit | 0 | 1 | 351.66 | 1041.29 | 0 | 2 | 33 | 237 | 34014 | ▇▁▁▁▁ |
| enrollment_f_whit | 0 | 1 | 450.90 | 1270.11 | 0 | 10 | 59 | 314 | 74462 | ▇▁▁▁▁ |
| enrollment_m_2mor | 0 | 1 | 21.76 | 75.40 | 0 | 0 | 1 | 12 | 5000 | ▇▁▁▁▁ |
| enrollment_f_2mor | 0 | 1 | 30.17 | 103.28 | 0 | 0 | 2 | 17 | 12146 | ▇▁▁▁▁ |
| enrollment_m_unkn | 0 | 1 | 36.47 | 226.13 | 0 | 0 | 1 | 16 | 25579 | ▇▁▁▁▁ |
| enrollment_f_unkn | 0 | 1 | 46.00 | 353.77 | 0 | 0 | 2 | 22 | 52081 | ▇▁▁▁▁ |
| enrollment_m_nral | 0 | 1 | 37.40 | 193.94 | 0 | 0 | 0 | 8 | 8181 | ▇▁▁▁▁ |
| enrollment_f_nral | 0 | 1 | 31.24 | 154.22 | 0 | 0 | 0 | 7 | 6339 | ▇▁▁▁▁ |
After performing an initial pass on the IPEDS dataset, I verified that the data structure is consistent with the instructor’s specifications. The dataset consists of 219,316 rows, representing a comprehensive view of institutional enrollment over 15 years. Using the skimr package, I observed that while most numeric columns are well-populated, there are also missing values in specific demographic categories that must be handled. I used na.rm = TRUE which tells r to ignore NULL or missing values during calculations. The data types are correctly identified, with institutional ID’s as numeric and names as character strings, providing a good setup for further analysis.
4 Exploratory Questions
My initial exploration of the data revealed several key metrics regarding the landscape of higher education and Baruch College specifically. There are 8,983 distinct institutions recorded in this dataset. In 2024, Baruch College had 3,585 graduate students enrolled. When considering the entire student body, the total enrollment at Baruch in 2024 reached 20,081 students.
Additionally, in 2019, Western Governors University led the way with the highest number of enrolled female undergraduates at 62,737. Looking at 2024 data for incoming classes, University of Hawaii at Manoa admitted the highest proportion of Native Hawaiian or Pacific Islander first-year undergraduates among large institutions, representing 3.00% of their first-year class.
4.1 Institutional Trends and Comparisons
View R Code
IPEDS_total |>
filter(level == "all graduate") |>
group_by(state) |>
summarize(total_grad = sum(total_enrollment, na.rm = TRUE)) |>
arrange(desc(total_grad)) |>
slice(1:5) |>
gt() |>
tab_header(
title = "Top 5 States by Total Graduate Enrollment",
subtitle = "Cumulative enrollment from 2010-2024"
) |>
cols_label(
state = "State",
total_grad = "Total Students"
) |>
fmt_number(columns = total_grad, decimals = 0) |>
tab_source_note("Source: IPEDS 2010–2024 via NCES. Cumulative headcount across all institutions.") |>
style_gt()| Top 5 States by Total Graduate Enrollment | |
| Cumulative enrollment from 2010-2024 | |
| State | Total Students |
|---|---|
| CA | 4,475,614 |
| NY | 3,667,601 |
| TX | 2,911,827 |
| IL | 2,329,612 |
| PA | 2,135,193 |
| Source: IPEDS 2010–2024 via NCES. Cumulative headcount across all institutions. | |
View R Code
IPEDS_total |>
filter(str_detect(institution_name, "Baruch"),
level == "all undergrad") |>
arrange(year) |>
mutate(
change = total_enrollment - lag(total_enrollment),
pct_change = (total_enrollment / lag(total_enrollment) - 1)
) |>
select(year, total_enrollment, pct_change) |>
gt() |>
tab_header(
title = "Baruch College Undergraduate Enrollment Trends",
subtitle = "Year-over-year change (2010–2024)"
) |>
cols_label(
year = "Year",
total_enrollment = "Total Undergrads",
pct_change = "% Change"
) |>
fmt_number(columns = total_enrollment, decimals = 0) |>
fmt_percent(columns = pct_change, decimals = 2) |>
sub_missing(columns = pct_change, missing_text = "—") |>
tab_source_note("Source: IPEDS 2010–2024 via NCES. First year shown has no prior-year comparison.") |>
style_gt()| Baruch College Undergraduate Enrollment Trends | ||
| Year-over-year change (2010–2024) | ||
| Year | Total Undergrads | % Change |
|---|---|---|
| 2010 | 13,120 | — |
| 2011 | 14,266 | 8.73% |
| 2012 | 13,777 | −3.43% |
| 2013 | 14,082 | 2.21% |
| 2014 | 14,857 | 5.50% |
| 2015 | 15,254 | 2.67% |
| 2016 | 15,210 | −0.29% |
| 2017 | 15,253 | 0.28% |
| 2018 | 15,024 | −1.50% |
| 2019 | 15,482 | 3.05% |
| 2020 | 15,774 | 1.89% |
| 2021 | 15,859 | 0.54% |
| 2022 | 15,896 | 0.23% |
| 2023 | 16,086 | 1.20% |
| 2024 | 16,496 | 2.55% |
| Source: IPEDS 2010–2024 via NCES. First year shown has no prior-year comparison. | ||
View R Code
IPEDS_total |>
filter(str_detect(institution_name, "Baruch"),
level == "all undergrad") |>
mutate(
Asian = (enrollment_m_asia + enrollment_f_asia) / total_enrollment,
Black = (enrollment_m_bkaa + enrollment_f_bkaa) / total_enrollment,
Hispanic = (enrollment_m_hisp + enrollment_f_hisp) / total_enrollment,
White = (enrollment_m_whit + enrollment_f_whit) / total_enrollment,
Other = 1 - Asian - Black - Hispanic - White
) |>
select(year, Asian, Black, Hispanic, White, Other) |>
pivot_longer(-year, names_to = "race", values_to = "proportion") |>
ggplot(aes(x = year, y = proportion, fill = race)) +
geom_area(alpha = 0.85) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_x_continuous(breaks = seq(2010, 2024, 2)) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Baruch College Undergraduate Racial Composition (2010–2024)",
subtitle = "Share of total undergraduate enrollment by racial group",
x = "Year", y = "Share of Enrollment", fill = "Group") +
theme_minimal(base_size = 14) +
theme(legend.position = "bottom")View R Code
total_fy_2024 <- IPEDS_total |>
filter(year == 2024, level == "first year undergrad") |>
summarize(total = sum(total_enrollment, na.rm = TRUE)) |>
pull(total)
IPEDS_total |>
filter(year == 2024, level == "first year undergrad", is_cuny) |>
mutate(
institution_name = str_remove(institution_name, "CUNY "),
pct_of_us = total_enrollment / total_fy_2024
) |>
select(institution_name, total_enrollment, pct_of_us) |>
arrange(desc(total_enrollment)) |>
gt() |>
tab_header(
title = "CUNY First-Year Enrollment Share (2024)",
subtitle = "Each campus as a share of all U.S. first-year undergraduates"
) |>
cols_label(
institution_name = "Campus",
total_enrollment = "First-Year Students",
pct_of_us = "Share of U.S. Total"
) |>
fmt_number(columns = total_enrollment, decimals = 0) |>
fmt_percent(columns = pct_of_us, decimals = 3) |>
tab_source_note("Source: IPEDS 2024 via NCES. 'CUNY' prefix removed from campus names for readability.") |>
style_gt()| CUNY First-Year Enrollment Share (2024) | ||
| Each campus as a share of all U.S. first-year undergraduates | ||
| Campus | First-Year Students | Share of U.S. Total |
|---|---|---|
| Borough of Manhattan Community College | 4,718 | 0.152% |
| New York City College of Technology | 3,585 | 0.115% |
| Hunter College | 2,897 | 0.093% |
| City College | 2,543 | 0.082% |
| Bernard M Baruch College | 2,532 | 0.081% |
| LaGuardia Community College | 2,406 | 0.077% |
| College of Staten Island CUNY | 2,196 | 0.071% |
| John Jay College of Criminal Justice | 2,128 | 0.068% |
| Queensborough Community College | 1,965 | 0.063% |
| Brooklyn College | 1,782 | 0.057% |
| Queens College | 1,751 | 0.056% |
| Kingsborough Community College | 1,615 | 0.052% |
| Lehman College | 1,545 | 0.050% |
| Bronx Community College | 1,252 | 0.040% |
| Hostos Community College | 844 | 0.027% |
| York College | 766 | 0.025% |
| Medgar Evers College | 586 | 0.019% |
| Stella and Charles Guttman Community College | 367 | 0.012% |
| Graduate School and University Center | 12 | 0.000% |
| Source: IPEDS 2024 via NCES. 'CUNY' prefix removed from campus names for readability. | ||
View R Code
IPEDS_total |>
filter(year == 2024, level == "first year undergrad", is_cuny) |>
mutate(institution_name = str_remove(institution_name, "CUNY ")) |>
arrange(total_enrollment) |>
mutate(institution_name = fct_inorder(institution_name)) |>
ggplot(aes(x = total_enrollment, y = institution_name)) +
geom_col(fill = "#2c7bb6") +
geom_text(aes(label = scales::comma(total_enrollment)),
hjust = -0.1, size = 4) +
scale_x_continuous(labels = scales::comma,
expand = expansion(mult = c(0, 0.15))) +
labs(title = "CUNY First-Year Enrollment by Campus (2024)",
x = "First-Year Students", y = NULL) +
theme_minimal(base_size = 14)View R Code
IPEDS_total |>
filter(year %in% c(2010, 2020), level == "all undergrad") |>
filter(total_enrollment > 1000) |>
mutate(white_prop = (enrollment_m_whit + enrollment_f_whit) / total_enrollment) |>
select(institution_id, institution_name, year, white_prop) |>
pivot_wider(
id_cols = c(institution_id, institution_name),
names_from = year,
values_from = white_prop
) |>
rename(white_prop_2010 = `2010`, white_prop_2020 = `2020`) |>
filter(!is.na(white_prop_2010) & !is.na(white_prop_2020)) |>
mutate(diff = white_prop_2020 - white_prop_2010) |>
arrange(diff) |>
slice(1:5) |>
select(institution_name, white_prop_2010, white_prop_2020, diff) |>
gt() |>
tab_header(
title = "Largest Decrease in White Student Fraction (2010–2020)",
subtitle = "Limited to institutions with more than 1,000 undergraduates"
) |>
cols_label(
institution_name = "Institution",
white_prop_2010 = "2010",
white_prop_2020 = "2020",
diff = "Change"
) |>
fmt_percent(columns = c(white_prop_2010, white_prop_2020, diff), decimals = 2) |>
tab_source_note("Source: IPEDS 2010 & 2020 via NCES. Change = 2020 minus 2010 white student share.") |>
style_gt()| Largest Decrease in White Student Fraction (2010–2020) | |||
| Limited to institutions with more than 1,000 undergraduates | |||
| Institution | 2010 | 2020 | Change |
|---|---|---|---|
| University of the Cumberlands | 82.25% | 12.41% | −69.83% |
| Southeastern Community College | 85.56% | 22.04% | −63.52% |
| New England Institute of Technology | 66.97% | 7.92% | −59.05% |
| Saint Joseph's College of Maine | 70.73% | 19.71% | −51.02% |
| Montcalm Community College | 89.47% | 46.46% | −43.00% |
| Source: IPEDS 2010 & 2020 via NCES. Change = 2020 minus 2010 white student share. | |||
View R Code
IPEDS_total |>
filter(year %in% c(2010, 2024), level == "all undergrad") |>
mutate(female_prop = rowSums(across(contains("_f_")),
na.rm = TRUE) / total_enrollment) |>
select(state, year, female_prop) |>
group_by(state, year) |>
summarize(female_prop = mean(female_prop, na.rm = TRUE),
.groups = "drop") |>
pivot_wider(names_from = year, values_from = female_prop) |>
rename(prop_2010 = `2010`, prop_2024 = `2024`) |>
filter(!is.na(prop_2010) & !is.na(prop_2024)) |>
mutate(diff = prop_2024 - prop_2010) |>
arrange(desc(diff)) |>
slice(1:3) |>
gt() |>
tab_header(
title = "States with Largest Increase in Female Undergraduates",
subtitle = "Change in female share of undergraduate enrollment, 2010–2024"
) |>
cols_label(
state = "State",
prop_2010 = "2010",
prop_2024 = "2024",
diff = "Change"
) |>
fmt_percent(columns = c(prop_2010, prop_2024, diff), decimals = 2) |>
tab_source_note("Source: IPEDS 2010 & 2024 via NCES.") |>
style_gt()| States with Largest Increase in Female Undergraduates | |||
| Change in female share of undergraduate enrollment, 2010–2024 | |||
| State | 2010 | 2024 | Change |
|---|---|---|---|
| FM | 53.72% | 62.87% | 9.15% |
| AS | 61.19% | 70.23% | 9.03% |
| UT | 69.10% | 74.41% | 5.31% |
| Source: IPEDS 2010 & 2024 via NCES. | |||
5 Final Insights and Deliverable
The chart below tracks Baruch’s minority diversity index from 2010 through the first post-SFFA admissions cycle.
View R Code
diversity_data |>
filter(str_detect(institution_name, "Baruch"),
level == "first year undergrad") |>
ggplot(aes(x = year, y = diversity_idx)) +
geom_line(linewidth = 1.2, color = "#2c7bb6") +
geom_point(size = 3, color = "#2c7bb6") +
geom_vline(xintercept = 2023, linetype = "dashed",
color = "red", linewidth = 0.8) +
annotate("text", x = 2023.2, y = 0.5,
label = "SFFA Decision", hjust = 0, color = "red") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1),
limits = c(0, 1)) +
scale_x_continuous(breaks = seq(2010, 2024, 2)) +
labs(title = "Baruch College: First-Year Minority Diversity Index",
subtitle = "Fraction of incoming students identifying as Black, Hispanic, AIAN, NHPI, or Multiracial",
x = "Year", y = "Diversity Index") +
theme_minimal(base_size = 14)When compared, Baruch’s and California public universities’ diversity indices show similar year-over-year fluctuations, suggesting that the observed changes may reflect normal demographic variation rather than a structural shift post-SFFA.
View R Code
baruch_trend <- diversity_data |>
filter(str_detect(institution_name, "Baruch"),
level == "first year undergrad") |>
group_by(year) |>
summarize(diversity_idx = mean(diversity_idx, na.rm = TRUE)) |>
mutate(system = "Baruch College")
cal_trend <- diversity_data |>
filter(is_calpublic, level == "first year undergrad") |>
group_by(year) |>
summarize(diversity_idx = mean(diversity_idx, na.rm = TRUE)) |>
mutate(system = "California Public Universities")
bind_rows(baruch_trend, cal_trend) |>
ggplot(aes(x = year, y = diversity_idx,
color = system, linetype = system)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2.5) +
geom_vline(xintercept = 2023, linetype = "dashed",
color = "red", linewidth = 0.8) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_x_continuous(breaks = seq(2010, 2024, 2)) +
scale_color_manual(values = c("Baruch College" = "#2c7bb6",
"California Public Universities" = "#d7191c")) +
labs(title = "First-Year Diversity Index: Baruch vs. California",
subtitle = "Red dashed line marks the 2023 SFFA Supreme Court decision",
x = "Year", y = "Diversity Index",
color = NULL, linetype = NULL) +
theme_minimal(base_size = 14) +
theme(legend.position = "bottom")In completing this mini-project, I used Claude (Anthropic) and Gemini (Google) to assist with debugging R code errors, specifically regarding pivot_wider list-column issues, gt table styling, and Quarto rendering. All analytical framing, written text, and interpretations are my own.
This work ©2026 by Raúl J. Solá Navarro was prepared as a Mini-Project for
STA 9750 at Baruch College · MP #01 Instructions