Authors
Affiliations

Hannah Rosenthal

Albert Einstein College of Medicine

Vagish Hemmige

Montefiore Medical Center/ Albert Einstein College of Medicine

The code in this script generates tables for the manuscript.

Source code

The full R script is available at:

This R script file is itself reliant on the following helper files:

Plotting mean monthly costs

The following function uses ggplot2 to create a plot of monthly costs with a risk table underneath.

Its inputs are as follows:

  • fitted_model: the fit object from models created in the R/models.R script
  • countdf: a data frame containing the appropriate count data for cases and controls at each time point, used to create a risk table

The function uses the emmeans function from the package of the same name to estimate months costs. The x-axis (months) is shifted to the right so that the vertical line at x=0 cleanly separate pre-index and post-index months, and cases and controls are dodged slightly so that the error bars do not overlap.

Click to show/hide R Code
#Function that returns a ggplot of the mean cost (with confidence interval) for cryptococcus cases and controls by month
plot_mean_monthly_costs<-function(fitted_model, countdf=final_count_df) {

  #First, use emmeans to calculate the predicted values from the model assuming 30-day months, then convert to data frame
  emm_df <- emmeans(
    fitted_model,
    ~ patient_type | month,
    at = list(month_offset = 30),
    type = "response"
  )%>%as.data.frame()%>%
    
    #Shifts all points and error_bars 0.5 to the right to avoid the zero-line
    mutate(month=0.5+as.numeric(month))%>%
    
    #Move cases 0.1 to the right and controls 0.1 to the left to improve legibility
  mutate(month=ifelse(patient_type=="Case", month+0.1, month-0.1))%>%
    mutate(
      estimate = if ("response" %in% names(.)) response else emmean)
  
  monthly_plot <- list()
  
  # Main plot
  monthly_plot[["main"]] <- ggplot(data = emm_df) +
    geom_point(aes(x = month, y = estimate, color = patient_type)) +
    geom_errorbar(
      aes(
        x = month,
        ymin = asymp.LCL,
        ymax = asymp.UCL,
        color = patient_type
      ),
      width = 0.1
    ) +
    geom_vline(xintercept = 0,
               linetype = "dashed",
               color = "gray50") +
    theme_classic() +
    labs(
      x = "Month",
      y = "Inflation-adjusted cost per month (dollars)",
      color = "Patient group"
    )+
    theme(legend.position = "bottom")
  
  # Risk table
  monthly_plot[["risk_table"]] <- countdf %>%
    mutate(month = 0.5 + as.numeric(month),
           y_position = ifelse(patient_type == "Case", 0.4, 0.6)) %>%
    ggplot() +
    geom_text(
      aes(x = month, y = y_position, label = n, color=patient_type),
      size = 3
    )+
    coord_cartesian(ylim = c(0.3, 0.7)) +
    theme_void() +
    theme(
      plot.margin = margin(t = -5, b = 5),
      legend.position = "none")
  
  # Combine plots
  monthly_plot[["main"]] /
    monthly_plot[["risk_table"]] +
    plot_layout(heights = c(9, 1), guides = "keep")+
    plot_annotation(
      caption = "Bottom panel shows N at risk by month"
    )
  
}

Calculating the number of potential matches

The above function is passed a control df and a number of values for a case patient, and returns the number of distinct controls which are potential matches. The matching algorithm is hard-coded.

Click to show/hide R Code
#Function that defines the number of potential control matches for a case
calculate_number_potential_matches<-function(control_df,
                                             USRDS_ID,
                                             birthdate,
                                             cirrhosis_status,
                                             cmv_status,
                                             hiv_status,
                                             diabetes_status,
                                             matching_date,
                                             matching_days_since_transplant) {
  
  print(paste0("Calculating number of potential controls for patient: ", USRDS_ID))
  control_df%>%
    
    #Exact match on cirrhosis, CMV, HIV, and diabetes status
    filter(cirrhosis==cirrhosis_status)%>%
    #  filter(CMV==cmv_status)%>%
    filter(HIV==hiv_status)%>%
    filter(diabetes==diabetes_status)%>%
    
    #Risk set matching
    filter(tstart<=matching_days_since_transplant)%>%
    filter(tstop>matching_days_since_transplant)%>%
    
    #Calculate date for age calculations/etc.
    mutate(.baseline_control_date=matching_days_since_transplant+most_recent_transplant_date)%>%
    
    #Make sure case and control are sampled within 3 years of each other
    filter(abs(time_length(interval(.baseline_control_date, matching_date), "years")) <=3)%>%
    
    #Date matching
    #  filter(cohort_start_date<=matching_date)%>%
    #  filter(cohort_stop_date>matching_date)%>%
    
    #Age>=18 on index date
    filter(time_length(interval(BORN, .baseline_control_date), "years") >= 18)%>%
    
    #Age difference under 10 years, calculated at sampling date
    filter(abs(time_length(interval(BORN,.baseline_control_date), "years")-
                 time_length(interval(birthdate,matching_date), "years")) <=10)%>%
    
    #Confirming 365 day Medicare lookback available for potential match
    verify_medicare_primary(index_date = matching_date, medicare_coverage_df = medicare_history, cache=TRUE)%>%
    filter(medicare_primary_TF==TRUE)%>%
    
    #Count rows after ensuring controls with mult transplants are only counted once 
    distinct(USRDS_ID)%>%
    nrow()%>%
    return()
  
}

Total excess costs

This function takes a fit object from a model and calculates the total estimated excess costs and confidence intervals. This object currently only works for models with linear links.

Click to show/hide R Code
# Calculate total excess costs in a model
calculate_total_excess_costs <- function(fitted_model, link = "linear") {
  
  # Extract coefficients and vcov matrix
  coef_df <- tidy(fitted_model)
  vcov_matrix <- vcov(fitted_model)
  
  # Get interaction terms
  interaction_names <- coef_df %>% 
    filter(str_detect(term, ":")) %>% 
    pull(term)
  
  if (link == "linear") {
    # Sum of coefficients
    sum_estimate <- coef_df %>% 
      filter(term %in% interaction_names) %>% 
      pull(estimate) %>% 
      sum()
    
    # Variance of sum
    sum_variance <- vcov_matrix[interaction_names, interaction_names] %>% 
      sum()
    
    # Standard error and CI
    sum_se <- sqrt(sum_variance)
    df <- df.residual(fitted_model)
    
    tibble(
      estimate = sum_estimate,
      std_error = sum_se,
      conf_low = estimate - qt(0.975, df) * std_error,
      conf_high = estimate + qt(0.975, df) * std_error,
      statistic = estimate / std_error,
      p_value = 2 * pt(-abs(statistic), df)
    )
    
  } else if (link == "log") {
    # Get coefficients for interactions
    coefs <- coef(fitted_model)[interaction_names]
    vcov_sub <- vcov_matrix[interaction_names, interaction_names]
    
    # Exponentiate each coefficient
    exp_coefs <- exp(coefs)
    
    # Sum of exponentiated coefficients
    sum_estimate <- sum(exp_coefs)
    
    # Delta method for variance
    # Gradient: derivative of sum(exp(beta)) w.r.t. each beta is exp(beta)
    gradient <- exp_coefs
    
    # Variance using delta method
    sum_variance <- as.numeric(t(gradient) %*% vcov_sub %*% gradient)
    sum_se <- sqrt(sum_variance)
    
    # CI using normal approximation (GLM)
    tibble(
      estimate = sum_estimate,
      std_error = sum_se,
      conf_low = estimate - qnorm(0.975) * std_error,
      conf_high = estimate + qnorm(0.975) * std_error,
      statistic = estimate / std_error,
      p_value = 2 * pnorm(-abs(statistic))
    )
    
  } else {
    stop("link must be either 'linear' or 'log'")
  }
}

Other portions of the analysis

  • Setup: Defines global paths, data sources, cohort inclusion criteria, and analysis-wide constants.
  • Tables: Summary tables and regression outputs generated from the final models.
  • Figures:Visualizations of costs, risks, and model-based estimates.
  • About: methods, assumptions, and disclosures