The following function uses ggplot2 to create a plot of monthly costs with a risk table underneath.
Its inputs are as follows:
fitted_model: the fit object from models created in the R/models.R script
countdf: a data frame containing the appropriate count data for cases and controls at each time point, used to create a risk table
The function uses the emmeans function from the package of the same name to estimate months costs. The x-axis (months) is shifted to the right so that the vertical line at x=0 cleanly separate pre-index and post-index months, and cases and controls are dodged slightly so that the error bars do not overlap.
Click to show/hide R Code
#Function that returns a ggplot of the mean cost (with confidence interval) for cryptococcus cases and controls by monthplot_mean_monthly_costs<-function(fitted_model, countdf=final_count_df) {#First, use emmeans to calculate the predicted values from the model assuming 30-day months, then convert to data frame emm_df <-emmeans( fitted_model,~ patient_type | month,at =list(month_offset =30),type ="response" )%>%as.data.frame()%>%#Shifts all points and error_bars 0.5 to the right to avoid the zero-linemutate(month=0.5+as.numeric(month))%>%#Move cases 0.1 to the right and controls 0.1 to the left to improve legibilitymutate(month=ifelse(patient_type=="Case", month+0.1, month-0.1))%>%mutate(estimate =if ("response"%in%names(.)) response else emmean) monthly_plot <-list()# Main plot monthly_plot[["main"]] <-ggplot(data = emm_df) +geom_point(aes(x = month, y = estimate, color = patient_type)) +geom_errorbar(aes(x = month,ymin = asymp.LCL,ymax = asymp.UCL,color = patient_type ),width =0.1 ) +geom_vline(xintercept =0,linetype ="dashed",color ="gray50") +theme_classic() +labs(x ="Month",y ="Inflation-adjusted cost per month (dollars)",color ="Patient group" )+theme(legend.position ="bottom")# Risk table monthly_plot[["risk_table"]] <- countdf %>%mutate(month =0.5+as.numeric(month),y_position =ifelse(patient_type =="Case", 0.4, 0.6)) %>%ggplot() +geom_text(aes(x = month, y = y_position, label = n, color=patient_type),size =3 )+coord_cartesian(ylim =c(0.3, 0.7)) +theme_void() +theme(plot.margin =margin(t =-5, b =5),legend.position ="none")# Combine plots monthly_plot[["main"]] / monthly_plot[["risk_table"]] +plot_layout(heights =c(9, 1), guides ="keep")+plot_annotation(caption ="Bottom panel shows N at risk by month" )}
Calculating the number of potential matches
The above function is passed a control df and a number of values for a case patient, and returns the number of distinct controls which are potential matches. The matching algorithm is hard-coded.
Click to show/hide R Code
#Function that defines the number of potential control matches for a casecalculate_number_potential_matches<-function(control_df, USRDS_ID, birthdate, cirrhosis_status, cmv_status, hiv_status, diabetes_status, matching_date, matching_days_since_transplant) {print(paste0("Calculating number of potential controls for patient: ", USRDS_ID)) control_df%>%#Exact match on cirrhosis, CMV, HIV, and diabetes statusfilter(cirrhosis==cirrhosis_status)%>%# filter(CMV==cmv_status)%>%filter(HIV==hiv_status)%>%filter(diabetes==diabetes_status)%>%#Risk set matchingfilter(tstart<=matching_days_since_transplant)%>%filter(tstop>matching_days_since_transplant)%>%#Calculate date for age calculations/etc.mutate(.baseline_control_date=matching_days_since_transplant+most_recent_transplant_date)%>%#Make sure case and control are sampled within 3 years of each otherfilter(abs(time_length(interval(.baseline_control_date, matching_date), "years")) <=3)%>%#Date matching# filter(cohort_start_date<=matching_date)%>%# filter(cohort_stop_date>matching_date)%>%#Age>=18 on index datefilter(time_length(interval(BORN, .baseline_control_date), "years") >=18)%>%#Age difference under 10 years, calculated at sampling datefilter(abs(time_length(interval(BORN,.baseline_control_date), "years")-time_length(interval(birthdate,matching_date), "years")) <=10)%>%#Confirming 365 day Medicare lookback available for potential matchverify_medicare_primary(index_date = matching_date, medicare_coverage_df = medicare_history, cache=TRUE)%>%filter(medicare_primary_TF==TRUE)%>%#Count rows after ensuring controls with mult transplants are only counted once distinct(USRDS_ID)%>%nrow()%>%return()}
Total excess costs
This function takes a fit object from a model and calculates the total estimated excess costs and confidence intervals. This object currently only works for models with linear links.
Click to show/hide R Code
# Calculate total excess costs in a modelcalculate_total_excess_costs <-function(fitted_model, link ="linear") {# Extract coefficients and vcov matrix coef_df <-tidy(fitted_model) vcov_matrix <-vcov(fitted_model)# Get interaction terms interaction_names <- coef_df %>%filter(str_detect(term, ":")) %>%pull(term)if (link =="linear") {# Sum of coefficients sum_estimate <- coef_df %>%filter(term %in% interaction_names) %>%pull(estimate) %>%sum()# Variance of sum sum_variance <- vcov_matrix[interaction_names, interaction_names] %>%sum()# Standard error and CI sum_se <-sqrt(sum_variance) df <-df.residual(fitted_model)tibble(estimate = sum_estimate,std_error = sum_se,conf_low = estimate -qt(0.975, df) * std_error,conf_high = estimate +qt(0.975, df) * std_error,statistic = estimate / std_error,p_value =2*pt(-abs(statistic), df) ) } elseif (link =="log") {# Get coefficients for interactions coefs <-coef(fitted_model)[interaction_names] vcov_sub <- vcov_matrix[interaction_names, interaction_names]# Exponentiate each coefficient exp_coefs <-exp(coefs)# Sum of exponentiated coefficients sum_estimate <-sum(exp_coefs)# Delta method for variance# Gradient: derivative of sum(exp(beta)) w.r.t. each beta is exp(beta) gradient <- exp_coefs# Variance using delta method sum_variance <-as.numeric(t(gradient) %*% vcov_sub %*% gradient) sum_se <-sqrt(sum_variance)# CI using normal approximation (GLM)tibble(estimate = sum_estimate,std_error = sum_se,conf_low = estimate -qnorm(0.975) * std_error,conf_high = estimate +qnorm(0.975) * std_error,statistic = estimate / std_error,p_value =2*pnorm(-abs(statistic)) ) } else {stop("link must be either 'linear' or 'log'") }}
Other portions of the analysis
Setup: Defines global paths, data sources, cohort inclusion criteria, and analysis-wide constants.
Tables: Summary tables and regression outputs generated from the final models.
Figures:Visualizations of costs, risks, and model-based estimates.
---title: "Functions"format: html---The code in this script generates tables for the manuscript.::: Rcode### Source codeThe full R script is available at:- [`R/functions.R`](https://github.com/VagishHemmige/Cryptococcus-cost-analysis/blob/master/R/functions.R)This R script file is itself reliant on the following helper files:- [`R/setup.R`](https://github.com/VagishHemmige/Cryptococcus-cost-analysis/blob/master/R/setup.R):::## Plotting mean monthly costsThe following function uses `ggplot2` to create a plot of monthly costs with a risk table underneath.Its inputs are as follows:- `fitted_model`: the `fit` object from models created in the `R/models.R` script- `countdf`: a data frame containing the appropriate count data for cases and controls at each time point, usedto create a risk tableThe function uses the `emmeans` function from the package of the same name to estimate months costs. The x-axis (months) is shifted to the right so that the vertical line at x=0 cleanly separate pre-index and post-index months, and cases and controls are dodged slightly so that the error bars do not overlap.```{r, eval=FALSE}#Function that returns a ggplot of the mean cost (with confidence interval) for cryptococcus cases and controls by monthplot_mean_monthly_costs<-function(fitted_model, countdf=final_count_df) { #First, use emmeans to calculate the predicted values from the model assuming 30-day months, then convert to data frame emm_df <- emmeans( fitted_model, ~ patient_type | month, at = list(month_offset = 30), type = "response" )%>%as.data.frame()%>% #Shifts all points and error_bars 0.5 to the right to avoid the zero-line mutate(month=0.5+as.numeric(month))%>% #Move cases 0.1 to the right and controls 0.1 to the left to improve legibility mutate(month=ifelse(patient_type=="Case", month+0.1, month-0.1))%>% mutate( estimate = if ("response" %in% names(.)) response else emmean) monthly_plot <- list() # Main plot monthly_plot[["main"]] <- ggplot(data = emm_df) + geom_point(aes(x = month, y = estimate, color = patient_type)) + geom_errorbar( aes( x = month, ymin = asymp.LCL, ymax = asymp.UCL, color = patient_type ), width = 0.1 ) + geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") + theme_classic() + labs( x = "Month", y = "Inflation-adjusted cost per month (dollars)", color = "Patient group" )+ theme(legend.position = "bottom") # Risk table monthly_plot[["risk_table"]] <- countdf %>% mutate(month = 0.5 + as.numeric(month), y_position = ifelse(patient_type == "Case", 0.4, 0.6)) %>% ggplot() + geom_text( aes(x = month, y = y_position, label = n, color=patient_type), size = 3 )+ coord_cartesian(ylim = c(0.3, 0.7)) + theme_void() + theme( plot.margin = margin(t = -5, b = 5), legend.position = "none") # Combine plots monthly_plot[["main"]] / monthly_plot[["risk_table"]] + plot_layout(heights = c(9, 1), guides = "keep")+ plot_annotation( caption = "Bottom panel shows N at risk by month" )}```## Calculating the number of potential matchesThe above function is passed a control df and a number of values for a case patient, and returns the number of distinct controls which are potential matches.The matching algorithm is hard-coded.```{r, eval=FALSE}#Function that defines the number of potential control matches for a casecalculate_number_potential_matches<-function(control_df, USRDS_ID, birthdate, cirrhosis_status, cmv_status, hiv_status, diabetes_status, matching_date, matching_days_since_transplant) { print(paste0("Calculating number of potential controls for patient: ", USRDS_ID)) control_df%>% #Exact match on cirrhosis, CMV, HIV, and diabetes status filter(cirrhosis==cirrhosis_status)%>% # filter(CMV==cmv_status)%>% filter(HIV==hiv_status)%>% filter(diabetes==diabetes_status)%>% #Risk set matching filter(tstart<=matching_days_since_transplant)%>% filter(tstop>matching_days_since_transplant)%>% #Calculate date for age calculations/etc. mutate(.baseline_control_date=matching_days_since_transplant+most_recent_transplant_date)%>% #Make sure case and control are sampled within 3 years of each other filter(abs(time_length(interval(.baseline_control_date, matching_date), "years")) <=3)%>% #Date matching # filter(cohort_start_date<=matching_date)%>% # filter(cohort_stop_date>matching_date)%>% #Age>=18 on index date filter(time_length(interval(BORN, .baseline_control_date), "years") >= 18)%>% #Age difference under 10 years, calculated at sampling date filter(abs(time_length(interval(BORN,.baseline_control_date), "years")- time_length(interval(birthdate,matching_date), "years")) <=10)%>% #Confirming 365 day Medicare lookback available for potential match verify_medicare_primary(index_date = matching_date, medicare_coverage_df = medicare_history, cache=TRUE)%>% filter(medicare_primary_TF==TRUE)%>% #Count rows after ensuring controls with mult transplants are only counted once distinct(USRDS_ID)%>% nrow()%>% return()}```## Total excess costsThis function takes a `fit` object from a model and calculates the total estimated excess costs and confidence intervals.This object currently only works for models with linear links.```{r, eval=FALSE}# Calculate total excess costs in a modelcalculate_total_excess_costs <- function(fitted_model, link = "linear") { # Extract coefficients and vcov matrix coef_df <- tidy(fitted_model) vcov_matrix <- vcov(fitted_model) # Get interaction terms interaction_names <- coef_df %>% filter(str_detect(term, ":")) %>% pull(term) if (link == "linear") { # Sum of coefficients sum_estimate <- coef_df %>% filter(term %in% interaction_names) %>% pull(estimate) %>% sum() # Variance of sum sum_variance <- vcov_matrix[interaction_names, interaction_names] %>% sum() # Standard error and CI sum_se <- sqrt(sum_variance) df <- df.residual(fitted_model) tibble( estimate = sum_estimate, std_error = sum_se, conf_low = estimate - qt(0.975, df) * std_error, conf_high = estimate + qt(0.975, df) * std_error, statistic = estimate / std_error, p_value = 2 * pt(-abs(statistic), df) ) } else if (link == "log") { # Get coefficients for interactions coefs <- coef(fitted_model)[interaction_names] vcov_sub <- vcov_matrix[interaction_names, interaction_names] # Exponentiate each coefficient exp_coefs <- exp(coefs) # Sum of exponentiated coefficients sum_estimate <- sum(exp_coefs) # Delta method for variance # Gradient: derivative of sum(exp(beta)) w.r.t. each beta is exp(beta) gradient <- exp_coefs # Variance using delta method sum_variance <- as.numeric(t(gradient) %*% vcov_sub %*% gradient) sum_se <- sqrt(sum_variance) # CI using normal approximation (GLM) tibble( estimate = sum_estimate, std_error = sum_se, conf_low = estimate - qnorm(0.975) * std_error, conf_high = estimate + qnorm(0.975) * std_error, statistic = estimate / std_error, p_value = 2 * pnorm(-abs(statistic)) ) } else { stop("link must be either 'linear' or 'log'") }}```## Other portions of the analysis- [**Setup**](setup.qmd): Defines global paths, data sources, cohort inclusion criteria, and analysis-wide constants.- [**Tables**](tables.qmd): Summary tables and regression outputs generated from the final models.- [**Figures**](figures.qmd):Visualizations of costs, risks, and model-based estimates.- [**About**](about.qmd): methods, assumptions, and disclosures