---
title: "EPOC main analysis"
author: "Ian Harris and Helen Badge"
date: '`r paste(format(Sys.Date(), "%d %b %Y"))`'
output:
#  pdf_document:
#    toc: yes
  html_document:
    toc: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=80),tidy=TRUE)
```
## About
### Background
This is the main analysis for EPOC: Evidence-based Processes and Outcomes of Care, a multi-site prospective cohort study recruiting patients from high-volume private and public arthroplasty centres in Australia. All participants were adults undergoing elective hip or knee arthroplasty for osteoarthritis. The study was designed to generate evidence on the association between adverse events and guideline compliance (for antibiotic use and venous thromboembolism [VTE] prophylaxis) in an elective arthroplasty population.
  
### Aim
The primary research question is:  
  *Is there a difference in the rate of major adverse events after elective joint replacement surgery between those who are compliant with both antibiotic and VTE guidelines compared to those who are not?*  
  
The secondary research questions are:  
  *Is there a difference in the post-operative infection rate after arthroplasty between those who are compliant with antibiotic guidelines and those who are not?* and  
  *Is there a difference in the VTE rate (DVT and PE) after arthroplasty between those who are compliant with VTE prophylaxis guidelines and those who are not?*  

#### Analysis 1: composite compliance vs composite outcome 
*Predictor*: binary composite compliance (variable: non_compliance_ab1vte1)  
*Outcome*: binary composite outcome (variable: composite_outcome1_365)
  
#### Analysis 2: VTE compliance vs VTE outcome
*Predictor*: VTE NHMRC compliance (variable: non_compliance_nhmrc_vte1)  
*Outcome*: binary symptomatic VTE (variable: cc_vte_yesno)
  
#### Analysis 3: antibiotic compliance vs infection outcomes (all joint infections and only deep infections)
*Predictor*: Therapeutic Guidelines (Aust) antibiotic compliance (variable: non_compliance_tg_ab1)  
*Outcome*: all joint infections (variable: cc_type_jtinfect)  
*Outcome*: deep infection only (variable: deep_jtinfect)
  
#### Load packages
```{r load packages}
library(ggplot2)
library(dplyr)
library(tidyverse)
library(caTools)
library(StepReg)
library(finalfit)
library(tableone)
library(caret)
library(MASS)
library(DataExplorer)
#install.packages("MAMI", repos=c("http://R-Forge.R-project.org", "http://cran.at.r-project.org"), dependencies=TRUE)
library(MAMI)
library(chron)
```
#### Load data
```{r load data}
data <- read.csv("epoc_with_fu_data_R_LATEST.csv", na.strings = c("", "999", "NA", "N/A", "NR"))
mydata <- as.data.frame(data)

# ImprovingServicesAnd_DATA_2020-11-01_1323
# Recode id_number= 886 due to error in data 
  # TG_Ab_1_compliant_missing_data: recode to FALSE
  # TG_Ab_1_compliant: recode to TRUE because  ab_ceph = 1 AND ab_vanc = 1 AND others not missing 
mydata <- mydata %>%
  mutate(TG_Ab_1_compliant_missing_data = ifelse(id_number == 886, FALSE, TG_Ab_1_compliant_missing_data),
         TG_Ab_1_compliant = ifelse(id_number == 886, TRUE, TG_Ab_1_compliant))

# remove ineligible and those with no follow up
mydata <- mydata %>% 
  filter((optout_fu > 0) & (eligibility == 1))
#glimpse(mydata)
```
## Step 1: Create and modify variables
### Create outcome variables
```{r create outcomes}
# Calculate composite outcome - missing data allowed if N=1 for another individual complication
mydata <- mydata %>% mutate(composite_outcome1_365 = case_when(
     ((!is.na(cc_death_yesno) & cc_death_yesno == 1) | 
      (!is.na(cc_vte_yesno) & cc_vte_yesno == 1) |
      (!is.na(cc_readmiss_yesno) & cc_readmiss_yesno == 1) |
      (!is.na(cc_reop_yesno) & cc_reop_yesno == 1)) ~ 1L,
     ((!is.na(cc_death_yesno) & cc_death_yesno == 0) &
      (!is.na(cc_vte_yesno) & cc_vte_yesno == 0) & 
      (!is.na(cc_readmiss_yesno) & cc_readmiss_yesno == 0) &
      (!is.na(cc_reop_yesno) & cc_reop_yesno == 0)) ~ 0L,
     TRUE ~ NA_integer_))
table(mydata$composite_outcome1_365, exclude = NULL, dnn  = "composite_outcome1_365")

# VTE outcome
table(mydata$cc_vte_yesno, exclude = NULL, dnn = "cc_vte_yesno")

# infection outcome
table(mydata$cc_type_jtinfect, exclude = NULL, dnn = "cc_type_jtinfect")

# Calculate deep infection
mydata <- mydata %>% mutate(deep_jtinfect = if_else(
 is.na(cc_type_jtinfect_type), 0L, if_else(
   cc_type_jtinfect_type > 0, 1L, 0L)))
table(mydata$deep_jtinfect, exclude = NULL, dnn = "deep_jtinfect")
```
### Create main predictor variables (compliance)
Note: the main analysis for calculating compliance variables from the raw data (REDCap export) is in a separate file: "EPOC compliance report"
```{r create predictors}
mydata <- mydata %>% mutate(nhmrc_vte_compl_3a4a = case_when(
     (is.na(VTE_1_compliant) | is.na(VTE_2_compliant) | 
      is.na(VTE_3a_compliant) | is.na(VTE_4a_compliant)) ~ NA_integer_,
       (VTE_1_compliant == 1 & VTE_2_compliant == 1 & VTE_3a_compliant == 1 & VTE_4a_compliant == 1) ~ 1L,
       (VTE_1_compliant == 0 | VTE_2_compliant == 0 | VTE_3a_compliant == 0 | VTE_4a_compliant == 0) ~ 0L,
     TRUE ~ NA_integer_))

# Calculate overall compliance
mydata <- mydata %>% mutate(overall_compliance_ab1vte1 = case_when(
     (is.na(TG_Ab_overall_3a_compliant) | is.na(nhmrc_vte_compl_3a4a)) ~ NA_integer_,
       (TG_Ab_overall_3a_compliant == 1 & nhmrc_vte_compl_3a4a == 1) ~ 1L,
     TRUE ~ 0L))

# Define overall non_compliance (the reverse of compliance)
mydata <- mydata %>% mutate(non_compliance_ab1vte1 = 1L - overall_compliance_ab1vte1)

# Create non-compliance for VTE and antibiotic
mydata <- mydata %>% mutate(non_compliance_nhmrc_vte1 = 1L - nhmrc_vte_compl_3a4a)
mydata <- mydata %>% mutate(non_compliance_tg_ab1 = 1L - TG_Ab_overall_3a_compliant)

# Shorten variable names to make table columns narrower
mydata$arth_meds_antidep <- mydata$arth_meds_antiep_antidepress

```
### Create/format other predictor variables
```{r format predictors}
# convert "9" to NA for variable "asa" and dichotomise into new variable "ASA" with two levels:
# "1,2" and "3,4"
mydata <- mydata %>%
  mutate(ASA = if_else(
    asa %in% c(1L,2L), "1,2", if_else(
      asa %in% c(3L,4L), "3,4", NA_character_
    )
  ))
table(mydata$ASA, exclude = NULL, dnn = "ASA grade dichotomised")

# convert edu into dichotomous variable
# 0, None | 1, Year 8 or below | 2, Year 10 or below | 3, Completed school | 4, TAFE | 5, University | 999, Not stated/Unknown
mydata <- mydata %>%
  mutate(edu = if_else(
    edu < 4, 0L, if_else(
      edu > 3, 1L, NA_integer_
    )
  ))
table(mydata$edu, exclude = NULL, dnn = "Education dichotomised")

# convert "surg_duration" from factor to numeric
mydata$surg_duration <- as.numeric(mydata$surg_duration)

# create new variable "bilat" (bilateral, yes/no) from variable "side"
table(mydata$side, exclude = NULL, dnn = "Operative side: L, R, bilateral")
mydata<- mydata %>%
  mutate(bilat = if_else(
    side == 2, 1L, if_else(
      side < 2, 0L, NA_integer_)))
table(mydata$bilat, exclude = NULL, dnn = "Bilateral")

# dichotomise smoking status
# Smoker:  0, Never smoked | 1, Ex smoker | 2, Current smoker | 999, Unkown/Not stated
mydata <- mydata %>%
  mutate(smoker = (if_else(
    smoker == 2, "Yes", if_else(
    smoker == 999, NA_character_, "No"
    ))))
table(mydata$smoker, dnn = "Current smoker", exclude=NULL)

# Rename arthritis meds
mydata$arth_meds_none <- mydata$arth_meds_spec___0
mydata$arth_meds_non_nsaids <- mydata$arth_meds_spec___1
mydata$arth_meds_nsaids <- mydata$arth_meds_spec___2
mydata$arth_meds_steroids <- mydata$arth_meds_spec___3
mydata$arth_meds_antidep <- mydata$arth_meds_spec___4
mydata$arth_meds_Opioids <- mydata$arth_meds_spec___5

#Create neurax
mydata <- mydata %>%
  mutate(neurax = if_else(
    anaes_2 %in% c(0,3), 0L, if_else(
      anaes_2 %in% c(1,2,4,5,6,7,8,9), 1L, NA_integer_
    )))

#### recode anaes_2 variable response 8 (other) for six participants who did NOT have neuraxial anaesthesia
mydata <- mydata %>%
  mutate(neurax = if_else(id_number %in% c(734, 1108, 1608, 1621, 1625, 1637), 0L, neurax))

table(mydata$neurax, dnn = "Neuraxial anaesthesia", exclude = NULL)

#Calculate how many prophylactic drugs received
mydata <- mydata %>%
  mutate(count_vte = (vte_clex + vte_frag + vte_riva + vte_dabig + vte_fondap + vte_warf + vte_hep + vte_unfhep + vte_aspir + vte_alt1 + vte_alt2))
table(mydata$count_vte, dnn = "Number of prophylactic VTE medications received")

mydata <- mydata %>%
  mutate(count_ab = (ab_ceph + ab_fluclox + ab_vanc + ab_genta + ab_clinda + ab_astre + ab_fluro + ab_alt1 + ab_alt2))
table(mydata$count_ab, dnn = "Number of prophylactic antibiotics received")

# Categorise variables as needed
catvars <- c("non_compliance_ab1vte1", 
             "non_compliance_tg_ab1", 
             "non_compliance_nhmrc_vte1", 
             "VTE_1_compliant", 
             "VTE_2_compliant", 
             "VTE_3a_compliant", 
             "VTE_4a_compliant", 
             "TG_Ab_1_compliant", 
             "TG_Ab_2_compliant", 
             "TG_Ab_3a_compliant", 
             "TG_Ab_4_compliant", 
             "TG_Ab_overall_3a_compliant", 
             "composite_outcome1_365", 
             "cc_death_yesno", 
             "cc_vte_yesno", 
             "cc_readmiss_yesno", 
             "cc_reop_yesno", 
             "cc_type_jtinfect", 
             "deep_jtinfect", 
             "joint",
             "bilat", 
             "ASA", 
             "hosp_name", 
             "hosp_pub_priv", 
             "pers_sex", 
             "edu", 
             "smoker", 
             "cc_hd", 
             "cc_diab", 
             "cc_bleed", 
             "cc_stroke", 
             "cc_hbp", 
             "cc_cholesterol", 
             "cc_kd", 
             "cc_livd", 
             "cc_cancer", 
             "ph_cancer", 
             "cc_lud", 
           #  "cc_mentalh", 
             "cc_dep",
             "cc_gord", 
             "cc_sleep_apnoea", 
             "cc_osa_rx", 
             "cc_nc",  
             "cc_oth", 
             "cc_msk", 
             "arth_meds", 
             "arth_meds_none", 
             "arth_meds_non_nsaids", 
             "arth_meds_nsaids", 
             "arth_meds_steroids", 
             "arth_meds_antidep", 
             "arth_meds_Opioids", 
             "hr_prev", 
             "kr_prev", 
             "cement_fix", 
             "bld_transf", 
             "txa",
             "vte_prev_vte", 
             "vte_dopp_pt",
             "drain",
             "tourn",
           "neurax",
             "vte_1stmobil_day1"
                          )
mydata[,catvars] <- lapply(mydata[,catvars], factor)

# Combine hospitals 17 and 18 as they are from one surgical department and health servcie (cross-campus)
levels(mydata$hosp_name) <- c(1:17, 17, 19)
```
## Step 2: Variable identification and visualisation
Identify and label predictors and outcomes.  
Define data type for each variable.  
Visualise and summarise each relevant variable.  
Transform if necessary.  
### Summarise and visualise variables
#### Plot continuous variables
```{r plot cont variables}
hist(mydata$age)
boxplot(mydata$age, vertical = TRUE)
summary(mydata$age)

hist(mydata$bmi)
boxplot(mydata$bmi, vertical = TRUE)
summary(mydata$bmi)

hist(mydata$los)
boxplot(mydata$los, vertical = TRUE)
summary(mydata$los)

hist(mydata$surg_duration)
boxplot(mydata$surg_duration, vertical = TRUE)
summary(mydata$surg_duration)

#log transform variable"los" to "loglos"
mydata$loglos <- log(mydata$los)
hist(mydata$loglos)
boxplot(mydata$loglos, vertical = TRUE)
summary(mydata$loglos)

tab <- CreateTableOne(data = mydata, vars = c("age", "bmi", "surg_duration", "loglos"))
print(tab, showAllLevels = TRUE)
summary(tab, pDigits = 2)
```
#### Use tableone to display variables
```{r Step 2}
myvars <- c("non_compliance_ab1vte1", 
            "non_compliance_tg_ab1", 
            "non_compliance_nhmrc_vte1", 
            "VTE_1_compliant", 
            "VTE_2_compliant", 
            "VTE_3a_compliant", 
            "VTE_4a_compliant", 
            "TG_Ab_1_compliant", 
            "TG_Ab_2_compliant",
            "TG_Ab_3a_compliant", 
            "TG_Ab_4_compliant", 
#            "TG_Ab_overall_3a_compliant", #collinearity with elements of compliance
            "composite_outcome1_365", 
            "cc_death_yesno", 
            "cc_vte_yesno", 
            "cc_readmiss_yesno", 
            "cc_reop_yesno", 
            "cc_type_jtinfect", 
            "deep_jtinfect", 
            "joint", 
            "bilat", 
            "hosp_name", 
            "hosp_pub_priv", 
            "pers_sex", 
            "edu", 
            "smoker", 
            "cc_hd", 
            "cc_diab", 
            "cc_bleed", 
            "cc_stroke", 
            "cc_hbp", 
            "cc_cholesterol", 
            "cc_kd", 
            "cc_livd", 
            "cc_cancer",
            "ph_cancer", 
            "cc_lud", 
           #  "cc_mentalh", #low event rate, depression or anxiety more relevant
             "cc_dep",
            "cc_gord", 
            "cc_sleep_apnoea", 
            "cc_nc", 
            "cc_oth", 
            "cc_msk", 
            "arth_meds", 
            "arth_meds_non_nsaids", 
            "arth_meds_nsaids", 
            "arth_meds_steroids", 
            "arth_meds_antidep", 
            "arth_meds_Opioids", 
            "hr_prev", 
            "kr_prev", 
            "ASA", 
            "cement_fix", 
            "bld_transf", 
            "txa", 
            "age", 
            "surg_duration", 
    #        "loglos",  #LOS considered a process outcome rather than covariate 
            "bmi", 
            "vte_prev_vte", 
            "vte_dopp_pt",
             "drain",
             "tourn",
            "neurax",
             "vte_1stmobil_day1")
tab1 <- CreateTableOne(data = mydata, vars = myvars)
print(tab1, showAllLevels = TRUE, pDigits = 2)
summary(tab1, pDigits = 2)
```
## Step 3: Bivariable (unadjusted) analyses
### Step 3a: Bivariable analysis for composite outcome
```{r bivariable analyses}

a <- xtabs(~ composite_outcome1_365 + non_compliance_ab1vte1, data = mydata)
addmargins(a)
summary(a) 

tab2 <- CreateTableOne(data = mydata, vars = myvars, strata = "composite_outcome1_365")
print(tab2, showAllLevels = TRUE, pDigits = 2)
summary(tab2, pDigits = 2)

bivar_screen <- tibble(varname = character(0), Df=integer(0), Deviance=numeric(0), "Resid. Df" = integer(0), "Resid.Dev"=numeric(0), "Pr(>Chi)" = numeric(0))

screenvar <- function(var) {
  f <- as.formula(paste("composite_outcome1_365", var, sep = " ~ "))
  a <- as.tibble(anova(glm(f, family=binomial(), data=mydata), test="Chisq"))[2,]
  a$varname <- var
  return(rbind(bivar_screen, a))
}
covariates <- myvars                                  
for(v in covariates) {
  bivar_screen <- screenvar(v)
}

bivar_screen %>% arrange(`Pr(>Chi)`)  %>% knitr::kable()

# finalfit Table 1
explanatory = covariates
dependent = "composite_outcome1_365"
mydata %>%
  summary_factorlist(dependent, explanatory,
  p=TRUE, add_dependent_label=FALSE)
```
### Step 3b: Bivariable analysis for VTE outcome
```{r bivariable analyses for VTE}
a <- xtabs(~ mydata$cc_vte_yesno + mydata$non_compliance_nhmrc_vte1, data = mydata)
addmargins(a)
summary(a)

tab3 <- CreateTableOne(data = mydata, vars = myvars, strata = "cc_vte_yesno")
print(tab3, showAllLevels = TRUE, pDigits = 2)
#summary(tab3, pDigits = 2)

screenvar.vte <- function(var) {
  f <- as.formula(paste("cc_vte_yesno", var, sep = " ~ "))
  a <- as.tibble(anova(glm(f, family=binomial(), data=mydata), test="Chisq"))[2,]
  a$varname <- var
  return(rbind(bivar_screen, a))
}
for(v in covariates) {
  bivar_screen <- screenvar.vte(v)
}

bivar_screen %>% arrange(`Pr(>Chi)`)  %>% knitr::kable()

# finalfit Table 1
explanatory = covariates
dependent = "cc_vte_yesno"
mydata %>%
  summary_factorlist(dependent, explanatory,
  p=TRUE, add_dependent_label=FALSE)
```
### Step 3c: Bivariable analysis for infection outcomes (all and deep only)
#### All infection outcomes
```{r bivariable analyses for all infection}
a <- xtabs(~ mydata$cc_type_jtinfect + mydata$non_compliance_tg_ab1, data = mydata)
addmargins(a)
summary(a)

tab4 <- CreateTableOne(data = mydata, vars = myvars, strata = "cc_type_jtinfect")
print(tab4, showAllLevels = TRUE, pDigits = 2)
# summary(tab4, pDigits = 2)

screenvar.ab <- function(var) {
  f <- as.formula(paste("cc_type_jtinfect", var, sep = " ~ "))
  a <- as.tibble(anova(glm(f, family=binomial(), data=mydata), test="Chisq"))[2,]
  a$varname <- var
  return(rbind(bivar_screen, a))
}

for(v in covariates) {
  bivar_screen <- screenvar.ab(v)
}

bivar_screen %>% arrange(`Pr(>Chi)`)  %>% knitr::kable()

# finalfit Table 1
explanatory = covariates
dependent = "cc_type_jtinfect"
mydata %>%
  summary_factorlist(dependent, explanatory,
  p=TRUE, add_dependent_label=FALSE)
```
#### Deep infection only
```{r bivariable analyses for deep infection}
a <- xtabs(~ mydata$deep_jtinfect + mydata$non_compliance_tg_ab1, data = mydata)
addmargins(a)
summary(a)

tab5 <- CreateTableOne(data = mydata, vars = myvars, strata = "deep_jtinfect")
print(tab5, showAllLevels = TRUE, pDigits = 2)
#summary(tab5, pDigits = 2)

screenvar.deep <- function(var) {
  f <- as.formula(paste("deep_jtinfect", var, sep = " ~ "))
  a <- as.tibble(anova(glm(f, family=binomial(), data=mydata), test="Chisq"))[2,]
  a$varname <- var
  return(rbind(bivar_screen, a))
}
                                
for(v in covariates) {
  bivar_screen <- screenvar.deep(v)
}

bivar_screen %>% arrange(`Pr(>Chi)`)  %>% knitr::kable()

# finalfit Table 1
explanatory = covariates
dependent = "deep_jtinfect"
mydata %>%
  summary_factorlist(dependent, explanatory,
  p=TRUE, add_dependent_label=FALSE)
```
## Step 4: Missing data
Detect and deal with missing data, if necessary.
```{r cut data}
# shrink dataset to variables of interest
cutdata <- mydata%>%
  dplyr::select(non_compliance_ab1vte1, non_compliance_tg_ab1, non_compliance_nhmrc_vte1, composite_outcome1_365, cc_vte_yesno, cc_type_jtinfect, deep_jtinfect, joint, bilat, hosp_name, hosp_pub_priv, pers_sex, edu, smoker, cc_hd, cc_diab, cc_bleed, cc_stroke, cc_hbp, cc_cholesterol, cc_kd, cc_livd, cc_cancer, cc_dep, ph_cancer, cc_lud, cc_mentalh, cc_gord, cc_sleep_apnoea, cc_nc, cc_oth, cc_msk,  arth_meds, arth_meds_non_nsaids, arth_meds_nsaids, arth_meds_steroids, arth_meds_antidep, arth_meds_Opioids, hr_prev, kr_prev, ASA, cement_fix, bld_transf, txa, age, surg_duration, loglos, bmi, vte_dopp_pt, vte_prev_vte, neurax, drain, vte_1stmobil_day1)
plot_str(cutdata)

# identify missing data
# basic method
#p_miss <- function(x){sum(is.na(x)/length(x)*100)}
#apply(mydata,2,p_miss)

library(Amelia)
missmap(cutdata, main = "Missing values vs observed")

library(VIM)
aggr(cutdata,numbers=TRUE, sortVars=TRUE)

# impute missing data (multiple imputation by chained equations)
library(mice)
cutdata_imp <- mice(cutdata, m=5, maxit=50, meth='pmm', seed=500, printFlag=FALSE)
summary(cutdata_imp)
comp_data <- complete(cutdata_imp)

# generate dataset with missing data removed for sensitivity analysis using complete case analysis (cca)
no_NAdata <- na.omit(cutdata)
str(cutdata)
```
## Step 5: Outliers
Identify and deal with outliers, if necessary
```{r outliers}
# no outliers detected in continuous variables (above)
```
## Step 6: Modelling
### Step 6a: Analysis 1: composite compliance vs composite outcome
**Predictor**: binary composite compliance (non_compliance_ab1vte1)  
**Outcome**: binary composite outcome (composite_outcome1_365)  
**Variable selection**: test all possible confounders and effect modifiers and include those with p < 0.25 on bivariable analysis in first step of backwards stepwise regression.  
**Method**: backwards stepwise regression starting with: variables chosen for clinical reasons (age, joint, bilat); the main predictor variable (composite compliance, forced into the model); and any variable with p < 0.25 on bivariable analysis. Criteria: AIC  
```{r composite outcome modelling}
#tab_comp_check <- CreateTableOne(data = cephdata, vars = c(
   #"joint", "vte_dopp_pt", "cement_fix", "vte_1stmobil_day1", "drain", "bmi", "cc_kd", "hosp_name", "arth_meds_nsaids", "surg_duration", "ASA", "cc_hd", "cc_gord", "cc_msk", "cc_stroke", "cc_hbp", "bld_transf", "cc_sleep_apnoea", "arth_meds_antidep", "cc_cholesterol", "age", "arth_meds", "hr_prev", "cc_lud", "arth_meds_steroids", "non_compliance_ab1vte1", "ph_cancer"))
#summary(tab_comp_check , pDigits = 2)

# arth_meds_steroids excluded due to low event rates causing model instability
# main model using imputed data
#exclude hosp_name  
mainmodel1 <- glm(composite_outcome1_365 ~ non_compliance_ab1vte1 + joint + vte_dopp_pt + cement_fix + vte_1stmobil_day1 + drain + bmi + cc_kd + arth_meds_nsaids + surg_duration + ASA + cc_hd + cc_gord + cc_msk + cc_stroke + cc_hbp + bld_transf + cc_sleep_apnoea + arth_meds_antidep + cc_cholesterol + age + arth_meds + hr_prev + cc_lud + ph_cancer, data = comp_data, family = "binomial")
summary(mainmodel1)

# individual (non_compliance_tg_ab1 + non_compliance_nhmrc_vte1) compliance removed from the model due to colinearity
# backward stepwise regression using stepAIC
stepmain1 <- stepAIC(mainmodel1, 
                     k= 2, 
                     trace = 0, 
                     direction = "backward", 
                     scope = list(upper = mainmodel1, 
                                  lower = ~ non_compliance_ab1vte1))

summary(stepmain1)
exp(cbind(OR = coef(stepmain1), confint(stepmain1)))

# model averaging using MAMI
ma_main <- mami(cutdata_imp, 
           model="binomial", 
           outcome="composite_outcome1_365", 
           method="MS.criterion",
           criterion="AIC",
           var.remove=c(
            "cc_vte_yesno",
            "cc_type_jtinfect",
            "deep_jtinfect",
            "non_compliance_tg_ab1",
            "non_compliance_nhmrc_vte1",
            "hosp_name",
            "cc_mentalh",
            "arth_meds_steroids"))
summary(ma_main)
```
### Step 6b: Analysis 2: VTE guideline compliance vs VTE outcome
**Predictor**: VTE NHMRC compliance (non_compliance_nhmrc_vte1)  
**Outcome**: binary symptomatic VTE (cc_vte_yesno)  
**Method**: backwards stepwise regression starting with: compulsory variables (for clinical reasons): joint, bilat and age, the main predictor (VTE NHMRC compliance), and any variable with p < 0.25 on bivariable analysis. Criteria: AIC.  
```{r VTE modelling}

#check VTE: non_compliance_nhmrc_vte1,  cc_vte_yesno, vte_dopp_pt, joint, cement_fix, hosp_name, vte_1stmobil_day1, drain, non_compliance_tg_ab1, bmi, cc_kd, hosp_name, cc_hd, arth_meds_nsaids, age, cement_fix, surg_duration, ASA, cc_gord, cc_msk, smoker, bmi, cc_stroke, cc_hbp ,hr_prev, cc_hbp, bld_transf, cc_sleep_apnoea, arth_meds_antidep, vte_prev_vte, cc_cholesterol, edu,  hr_prev, cc_lud, cc_dep, arth_meds_steroids, cc_msk, ph_cancer 

#remove hosp_name + arth_meds_steroids excluded due to low event rates causing model instability
modelVTE1 <- glm(cc_vte_yesno ~ non_compliance_nhmrc_vte1 + vte_dopp_pt + joint + cement_fix + vte_1stmobil_day1 + drain + non_compliance_tg_ab1 + bmi + cc_kd + cc_hd + arth_meds_nsaids + age + cement_fix + surg_duration + ASA + cc_gord + cc_msk + smoker + bmi + cc_stroke + cc_hbp + hr_prev + bld_transf + cc_sleep_apnoea + arth_meds_antidep + vte_prev_vte + cc_cholesterol + edu + hr_prev + cc_lud + cc_dep + ph_cancer, data = comp_data, family = "binomial")
summary(modelVTE1)
stepVTE1 <- stepAIC(modelVTE1, trace = 0, direction = "backward")
summary(stepVTE1)
exp(cbind(OR = coef(stepVTE1), confint(stepVTE1)))

ma_vte <- mami(cutdata_imp, 
           model="binomial", 
           outcome="cc_vte_yesno", 
           method="MS.criterion",
           criterion="AIC",
           var.remove=c(
            "composite_outcome1_365",
            "cc_type_jtinfect",
            "deep_jtinfect",
            "non_compliance_tg_ab1",
            "non_compliance_ab1vte1",
            "hosp_name",
            "cc_mentalh",
            "arth_meds_steroids"))
summary(ma_vte)
```
### Step 6c: Analysis 3. Antibiotic guideline compliance vs infection outcomes
**Predictor**: Therapeutic Guidelines (Aust) antibiotic compliance (non_compliance_tg_ab1)  
**Outcome**: all joint infections (cc_type_jtinfect)  
**Outcome**: deep infection only (deep_jtinfect)  
**Method**: backwards stepwise regression starting with: compulsory variables (for clinical reasons): joint, bilat and age, the main predictor (all joint infections), and any variable with p < 0.25 on bivariable analysis. Criteria: AIC. 

#### All joint infections
```{r all joint infection modelling}
#check Age, arth_meds_antidep, arth_meds_nsaids, arth_meds_Opioids, arth_meds_steroids, ASA, bilat, bld_transf, bmi, cc_cancer, cc_cholesterol, cc_dep, cc_diab, cc_gord, cc_hbp, cc_hd, cc_kd, cc_lud, cc_msk, cc_nc, cc_sleep_apnoea, cc_stroke, cc_type_jtinfect, cement_fix, drain, edu, hosp_pub_priv, hr_prev, joint, neurax, non_compliance_nhmrc_vte1, non_compliance_tg_ab1, ph_cancer, smoker, surg_duration, vte_dopp_pt, vte_prev_vte

# arth_meds_steroids, cc_cancer removed due to small event rate and model instability
# hosp_name removed due to model instability (no events for Hospital 4)
modelAB1 <- glm(cc_type_jtinfect ~ non_compliance_tg_ab1 + non_compliance_nhmrc_vte1 + age + arth_meds_antidep + arth_meds_nsaids + arth_meds_Opioids + ASA + bilat + bld_transf + bmi + cc_cholesterol + cc_dep + cc_diab + cc_gord + cc_hbp + cc_hd + cc_kd + cc_lud + cc_msk + cc_nc + cc_sleep_apnoea + cc_stroke + cement_fix + drain + edu + hosp_pub_priv + hr_prev + joint + neurax + ph_cancer + smoker + surg_duration + vte_dopp_pt + vte_prev_vte, data = comp_data, family = "binomial")
summary(modelAB1)
stepAB1 <- stepAIC(modelAB1, trace = 0, direction = "backward")
summary(stepAB1)
exp(cbind(OR = coef(stepAB1), confint(stepAB1)))

# model averaging using MAMI
ma_ab1 <- mami(cutdata_imp, 
           model="binomial", 
           outcome="cc_type_jtinfect", 
           method="MS.criterion",
           criterion="AIC",
           var.remove=c(
            "composite_outcome1_365",
            "cc_vte_yesno",
            "deep_jtinfect",
            "non_compliance_ab1vte1",
            "hosp_name",
            "cc_mentalh",
            "arth_meds_steroids"))
summary(ma_ab1)
```
#### Deep joint infection only
```{r deep infection modelling}
#check age, arth_meds_antidep, arth_meds_non_nsaids, arth_meds_Opioids, arth_meds_steroids, ASA, bilat, , bld_transf, bmi, cc_cancer, cc_cholesterol, cc_dep, cc_dep, cc_diab, cc_gord, cc_hbp, cc_hd, cc_kd, cc_lud, cc_msk, cc_nc, cc_sleep_apnoea, cc_stroke, cement_fix, deep_jtinfect, drain, edu, hosp_pub_priv, hr_prev, joint, neurax, non_compliance_nhmrc_vte1, non_compliance_tg_ab1, ph_cancer, smoker, surg_duration, vte_dopp_pt, vte_prev_vte

#  arth_meds_steroids and cc_cancer removed due to low event rate and model instability
modelAB2 <- glm(deep_jtinfect ~ non_compliance_tg_ab1 + non_compliance_nhmrc_vte1 + age + arth_meds_antidep + arth_meds_non_nsaids + arth_meds_Opioids + ASA + bilat + bld_transf + bmi + cc_cholesterol + cc_dep + cc_diab + cc_gord + cc_hbp + cc_hd + cc_kd + cc_lud + cc_msk + cc_nc + cc_sleep_apnoea + cc_stroke + cement_fix + drain + edu + hosp_pub_priv + hr_prev + joint + neurax + ph_cancer + smoker + surg_duration + vte_dopp_pt + vte_prev_vte, data = comp_data, family = "binomial")
summary(modelAB2)

#exp(cbind(OR = coef(modelAB2), confint(modelAB2)))
stepAB2 <- stepAIC(modelAB2, trace = 0, direction = "backward", scope = list(upper = modelAB2,      lower = ~ non_compliance_tg_ab1))
summary(stepAB2)
exp(cbind(OR = coef(stepAB2), confint(stepAB2)))

# model averaging using MAMI - check once final model ran 01122020
ma_ab2 <- mami(cutdata_imp, 
           model="binomial", 
           outcome="deep_jtinfect", 
           method="MS.criterion",
           criterion="AIC",
           var.remove=c(
            "composite_outcome1_365",
            "cc_vte_yesno",
            "cc_type_jtinfect",
            "non_compliance_ab1vte1",
            "hosp_name",
            "cc_mentalh",
            "arth_meds_steroids"))
summary(ma_ab2)
```
## Step 7: Model diagnostics
### Step 7a. Analysis 1: composite compliance vs composite outcome
#### Check assumptions for regression
```{r main model assumptions}
library(broom)
# Predict the probability (p) of outcome
pmain <- predict(stepmain1, type = "response")
predicted_main <- ifelse(pmain > 0.5, "pos", "neg")
head(predicted_main)
plot(pmain) # one outlier noted
# Assumption 1: linearity of indep. variables and log odds
# Select numeric predictors
diag_data <- comp_data %>%
  dplyr::select_if(is.numeric) 
predictors <- colnames(diag_data)
# Bind the logit and tidying the data for plot
diag_data_main <- diag_data %>%
  mutate(logit = log(pmain/(1-pmain))) %>%
  gather(key = "predictors", value = "predictor.value", -logit)
ggplot(diag_data_main, aes(logit, predictor.value))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess") + 
  theme_bw() + 
  facet_wrap(~predictors, scales = "free_y")
# Assumption 2: no influential observations
plot(stepmain1, which = 4, id.n = 3)
model.data <- augment(stepmain1) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd)
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = composite_outcome1_365), alpha = .5) +
  theme_bw()
# Assumption 3: no collinearity between predictors
car::vif(stepmain1) # VIF up to 15% - NOT seen when main predictor is not forced into the model
# Assumption 4: independence of observations
# assumed due to nature of study (separate patients) but possible role of grouping at level of hospital (not significant in model) **Tim: maybe a note about multi-level modelling**
```

#### Check model fit
```{r main model fit}
# calculate the AUC
library(pROC)
roc <- roc(stepmain1$y ~ pmain)
auc(roc)
plot(roc)
# calculate H-L goodness of fit
library(ResourceSelection)
hoslem.test(stepmain1$y, fitted(stepmain1))
# calculate pseudo-R squared
library(DescTools)
PseudoR2(stepmain1, which = c("McFadden", "Nagelkerke"))
```

### Step 7b. Analysis 2: VTE guideline compliance vs VTE outcome
#### Check assumptions for regression
```{r VTE model assumptions}
# Predict the probability (p) of outcome
pVTE <- predict(stepVTE1, type = "response")
predicted_VTE <- ifelse(pVTE > 0.5, "pos", "neg")
head(predicted_VTE)
# 1. check linearity
# Bind the logit and tidy the data for plotting
diag_dataVTE <- diag_data %>%
  mutate(logitVTE = log(pVTE/(1-pVTE))) %>%
  gather(key = "predictors", value = "predictor.value", -logitVTE)
ggplot(diag_dataVTE, aes(logitVTE, predictor.value))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess") + 
  theme_bw() + 
  facet_wrap(~predictors, scales = "free_y")
# 2. look for influential points
plot(stepVTE1, which = 4, id.n = 3)
model_data_VTE <- augment(stepVTE1) %>% 
  mutate(index = 1:n())
model_data_VTE %>% top_n(3, .cooksd)
ggplot(model_data_VTE, aes(index, .std.resid)) + 
  geom_point(aes(color = cc_vte_yesno), alpha = .5) +
  theme_bw()
# 3. collinearity
car::vif(stepVTE1) # VIF for age, cc_hd > 10%
```

#### Check model fit
```{r VTE model fit}
# calculate the AUC
rocVTE <- roc(stepVTE1$y ~ pmain)
auc(rocVTE)
plot(rocVTE)
# calculate H-L goodness of fit
hoslem.test(stepVTE1$y, fitted(stepVTE1))
# calculate pseudo-R squared
PseudoR2(stepVTE1, which = c("McFadden", "Nagelkerke"))
```

### Step 7c. Analysis 3: antibiotic guideline compliance vs infection outcomes
#### Check assumptions for regression (all joint infections)
```{r all infection model assumptions}
# Predict the probability (p) of outcome
pAB1 <- predict(stepAB1, type = "response")
predicted_AB1 <- ifelse(pAB1 > 0.5, "pos", "neg")
head(predicted_AB1)
# 1. check linearity
# Bind the logit and tidy the data for plotting
diag_dataAB1 <- diag_data %>%
  mutate(logitAB1 = log(pAB1/(1-pAB1))) %>%
  gather(key = "predictors", value = "predictor.value", -logitAB1)
ggplot(diag_dataAB1, aes(logitAB1, predictor.value))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess") + 
  theme_bw() + 
  facet_wrap(~predictors, scales = "free_y")
# 2. look for influential points
plot(stepAB1, which = 4, id.n = 3)
model_data_AB1 <- augment(stepAB1) %>% 
  mutate(index = 1:n())
model_data_AB1 %>% top_n(3, .cooksd)
ggplot(model_data_AB1, aes(index, .std.resid)) + 
  geom_point(aes(color = cc_type_jtinfect), alpha = .5) +
  theme_bw()
# 3. collinearity
car::vif(stepAB1) # VIF for hosp_pub_priv, bmi, bld_transf > 10%
```

#### Check assumptions for regression (deep joint infections only)
```{r deep infection model assumptions}
# Predict the probability (p) of outcome
pAB2 <- predict(stepAB2, type = "response")
predicted_AB2 <- ifelse(pAB2 > 0.5, "pos", "neg")
head(predicted_AB2)
# 1. check linearity
# Bind the logit and tidy the data for plotting
diag_dataAB2 <- diag_data %>%
  mutate(logitAB2 = log(pAB2/(1-pAB2))) %>%
  gather(key = "predictors", value = "predictor.value", -logitAB2)
ggplot(diag_dataAB2, aes(logitAB2, predictor.value))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess") + 
  theme_bw() + 
  facet_wrap(~predictors, scales = "free_y")
# 2. look for influential points
plot(stepAB2, which = 4, id.n = 3)
model_data_AB2 <- augment(stepAB2) %>% 
  mutate(index = 1:n())
model_data_AB2 %>% top_n(3, .cooksd)
ggplot(model_data_AB2, aes(index, .std.resid)) + 
  geom_point(aes(color = deep_jtinfect), alpha = .5) +
  theme_bw()
# 3. collinearity
car::vif(stepAB2) # VIF for cc_sleep_apnoea, bmi > 10%
```

#### Check model fit (all joint infections)
```{r all infection model fit}
# calculate the AUC
rocAB1 <- roc(stepAB1$y ~ pmain)
auc(rocAB1)
plot(rocAB1)
# calculate H-L goodness of fit
hoslem.test(stepAB1$y, fitted(stepAB1))
# calculate pseudo-R squared
PseudoR2(stepAB1, which = c("McFadden", "Nagelkerke"))
```

#### Check model fit (deep joint infections only)
```{r deep infection model fit}
# calculate the AUC
rocAB2 <- roc(stepAB2$y ~ pmain)
auc(rocAB2)
plot(rocAB2)
# calculate H-L goodness of fit
hoslem.test(stepAB2$y, fitted(stepAB2))
# calculate pseudo-R squared
PseudoR2(stepAB2, which = c("McFadden", "Nagelkerke"))
```

## Step 8. Sensitivity analyses
### Step 8a. Analysis 1: composite compliance vs composite outcome
```{r sensitivity main}
# run model with missing data removed (complete case analysis, no imputation)
mainmodel2 <- glm(composite_outcome1_365 ~ non_compliance_ab1vte1 + joint + vte_dopp_pt + cement_fix + vte_1stmobil_day1 + drain + bmi + cc_kd + arth_meds_nsaids + surg_duration + ASA + cc_hd + cc_gord + cc_msk + cc_stroke + cc_hbp + bld_transf + cc_sleep_apnoea + arth_meds_antidep + cc_cholesterol + age + arth_meds + hr_prev + cc_lud + ph_cancer, data = no_NAdata, family = "binomial")
stepmain2 <- stepAIC(mainmodel2, trace = 0, direction = "backward")
summary(stepmain2)
exp(stepmain2$coefficients)
exp(confint(stepmain2))
# removing missing data results in same final model

# remove single outlier in outcome (high logodds) causing loss of linearity in model diagnostics
comp_data_drop1600 <- comp_data[-1600, ]
mainmodeldrop <- glm(composite_outcome1_365 ~ loglos + joint + cement_fix + hosp_name + bmi + cc_kd + cc_hd + arth_meds_nsaids + age + surg_duration + ASA + cc_gord + cc_msk + cc_stroke + cc_hbp + hr_prev + bld_transf + cc_sleep_apnoea + arth_meds_antidep + smoker + cc_cholesterol + cc_nc + cc_lud + ph_cancer + non_compliance_ab1vte1, data = comp_data_drop1600, family = "binomial")
stepmaindrop <- stepAIC(mainmodeldrop, trace = 0, direction = "backward")
summary(stepmaindrop)
exp(stepmaindrop$coefficients)
exp(confint(stepmaindrop))
# Predict the probability (p) of outcome
pmain <- predict(stepmaindrop, type = "response")
predicted_main <- ifelse(pmain > 0.5, "pos", "neg")
head(predicted_main)
plot(pmain) # one outlier noted
# 1. check linearity
# Select numeric predictors
diag_data <- comp_data_drop1600 %>%
  dplyr::select_if(is.numeric) 
predictors <- colnames(diag_data)
# Bind the logit and tidying the data for plot
diag_data <- diag_data %>%
  mutate(logit = log(pmain/(1-pmain))) %>%
  gather(key = "predictors", value = "predictor.value", -logit)
ggplot(diag_data, aes(logit, predictor.value))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess") + 
  theme_bw() + 
  facet_wrap(~predictors, scales = "free_y")
# linearity improved afer removal of 1600
# 2. look for influential points
plot(stepmaindrop, which = 4, id.n = 3)
model.data <- augment(stepmaindrop) %>% 
  mutate(index = 1:n())
model.data %>% top_n(3, .cooksd)
ggplot(model.data, aes(index, .std.resid)) + 
  geom_point(aes(color = composite_outcome1_365), alpha = .5) +
  theme_bw()
# 3. collinearity
car::vif(stepmaindrop) # all VIF levels low
```

### Step 8b. Analysis 2: VTE guideline compliance vs VTE outcome
```{r sensitivity VTE}
# same modelling process using data with NA removed (complete case analysis) for comparison
no_NAdata <- na.omit(cutdata)
modelVTE2 <- glm(cc_vte_yesno ~ non_compliance_nhmrc_vte1 + vte_dopp_pt + joint + cement_fix + vte_1stmobil_day1 + drain + non_compliance_tg_ab1 + bmi + cc_kd + cc_hd + arth_meds_nsaids + age + cement_fix + surg_duration + ASA + cc_gord + cc_msk + smoker + bmi + cc_stroke + cc_hbp + hr_prev + cc_hbp + bld_transf + cc_sleep_apnoea + arth_meds_antidep + vte_prev_vte + cc_cholesterol + edu + hr_prev + cc_lud + cc_dep + cc_msk + ph_cancer, data = no_NAdata, family = "binomial")
stepVTE2 <- stepAIC(modelVTE2, trace = 0, direction = "backward")
summary(stepVTE2)
exp(cbind(OR = coef(stepVTE2), confint(stepVTE2)))
# results in 3% difference in OR for VTE compliance (2.65 vs 2.74)

```

### Step 8c. Analysis 3: antibiotic compliance vs infection outcomes
#### All joint infections
```{r sensitivity all infection}
# re-run analysis with NA removed ( + non_compliance_tg_ab1 + non_compliance_nhmrc_vte1)
modelAB3 <- glm(cc_type_jtinfect ~ non_compliance_tg_ab1 + non_compliance_nhmrc_vte1 + age + arth_meds_antidep + arth_meds_nsaids + arth_meds_Opioids + ASA + bilat + bld_transf + bmi + cc_cholesterol + cc_dep + cc_diab + cc_gord + cc_hbp + cc_hd + cc_kd + cc_lud + cc_msk + cc_nc + cc_sleep_apnoea + cc_stroke + cement_fix + drain + edu + hosp_pub_priv + hr_prev + joint + neurax + ph_cancer + smoker + surg_duration + vte_dopp_pt + vte_prev_vte, data = no_NAdata, family = "binomial")
summary(modelAB3)
stepAB3 <- stepAIC(modelAB3, trace = 0, direction = "backward")
summary(stepAB3)
exp(cbind(OR = coef(stepAB3), confint(stepAB3)))
# results in 8% difference in OR for antibiotic compliance (1.81 v 1.97) but remains significant
```

#### deep joint infections only
```{r sensitivity deep infection}
# re-run analysis with NA removed (not imputed) for comparison
modelAB4 <- glm(deep_jtinfect ~ non_compliance_tg_ab1 + non_compliance_nhmrc_vte1 + age + arth_meds_antidep + arth_meds_non_nsaids + arth_meds_Opioids + ASA + bilat + bld_transf + bmi + cc_cholesterol + cc_dep + cc_diab + cc_gord + cc_hbp + cc_hd + cc_kd + cc_lud + cc_msk + cc_nc + cc_sleep_apnoea + cc_stroke + cement_fix + drain + edu + hosp_pub_priv + hr_prev + joint + neurax + ph_cancer + smoker + surg_duration + vte_dopp_pt + vte_prev_vte, data = no_NAdata, family = "binomial")
summary(modelAB4)
stepAB4 <- stepAIC(modelAB4, trace = 0, direction = "backward")
summary(stepAB4)
exp(cbind(OR = coef(stepAB4), confint(stepAB4)))
# results in similar model (cc_sleep_apnoea in, hosp_pub_priv out)

```

## Step 9. Alternative analyses
### Step 9a. BIC criteria
```{r}
# run main model using BIC criteria instead of AIC
stepmainBIC <- stepAIC(mainmodel1, k= log(1875), trace = 0, direction = "backward")
summary(stepmainBIC)
exp(cbind(OR = coef(stepmainBIC), confint(stepmainBIC)))

# VTE model using BIC criteria
stepVTEBIC <- stepAIC(modelVTE1, k = log(1875), trace = 0, direction = "backward")
summary(stepVTEBIC)
exp(cbind(OR = coef(stepVTEBIC), confint(stepVTEBIC)))

# all infection model using BIC criteria
stepAB1BIC <- stepAIC(modelAB1, k = log(1875), trace = 0, direction = "backward")
summary(stepAB1BIC)
exp(cbind(OR = coef(stepAB1BIC), confint(stepAB1BIC)))

# deep infection model using BIC criteria
stepAB2BIC <- stepAIC(modelAB2, k = log(1875), trace = 0, direction = "backward")
summary(stepAB2BIC)
exp(cbind(OR = coef(stepAB2BIC), confint(stepAB2BIC)))
```
### Step 9b. Exclude routine doppler

```{r sensitivity analyses without DUS}
## Rerun Composite analyses excluding routine doppler for comparison
mainmodel1_rd <- glm(composite_outcome1_365 ~ non_compliance_ab1vte1 + joint + cement_fix + vte_1stmobil_day1 + drain + bmi + cc_kd + arth_meds_nsaids + surg_duration + ASA + cc_hd + cc_gord + cc_msk + cc_stroke + cc_hbp + bld_transf + cc_sleep_apnoea + arth_meds_antidep + cc_cholesterol + age + arth_meds + hr_prev + cc_lud + ph_cancer, data = comp_data, family = "binomial")
summary(mainmodel1_rd)

stepmain1_rd <- stepAIC(mainmodel1_rd, 
                     k= 2, 
                     trace = 0, 
                     direction = "backward", 
                     scope = list(upper = mainmodel1_rd, 
                                  lower = ~ non_compliance_ab1vte1))

summary(stepmain1_rd)
exp(cbind(OR = coef(stepmain1_rd), confint(stepmain1_rd)))

## Rerun VTE analyses excluding routine doppler for comparison
modelVTE1_rd <- glm(cc_vte_yesno ~ non_compliance_nhmrc_vte1 + joint + cement_fix + vte_1stmobil_day1 + drain + non_compliance_tg_ab1 + bmi + cc_kd + cc_hd + arth_meds_nsaids + age + cement_fix + surg_duration + ASA + cc_gord + cc_msk + smoker + bmi + cc_stroke + cc_hbp + hr_prev + cc_hbp + bld_transf + cc_sleep_apnoea + arth_meds_antidep + vte_prev_vte + cc_cholesterol + edu + hr_prev + cc_lud + cc_dep + cc_msk + ph_cancer, data = comp_data, family = "binomial")
summary(modelVTE1_rd)
stepVTE1_rd <- stepAIC(modelVTE1_rd, trace = 0, direction = "backward")
summary(stepVTE1_rd)
exp(cbind(OR = coef(stepVTE1_rd), confint(stepVTE1_rd)))

## Routine dopper ultrasound not retained in final models for all infection outcomes or deep infection outcomes analyses
```

### Step 9C. Inclusion interaction terms for all covariates in each model 

```{r alternative analyses including interaction terms}

## Analysis 1: Composite compliance and outcomes - final model - including interaction terms between retained variables and primary predictor, composite non-compliance, noting primary predictor not significant in final model   
model_composite_interact1 <- glm(formula = composite_outcome1_365 ~ non_compliance_ab1vte1 + joint + vte_dopp_pt + vte_1stmobil_day1 + cc_kd + arth_meds_nsaids + cc_gord + cc_msk + cc_stroke + non_compliance_ab1vte1*joint + non_compliance_ab1vte1*vte_dopp_pt + non_compliance_ab1vte1*vte_1stmobil_day1 + non_compliance_ab1vte1*cc_kd + non_compliance_ab1vte1*arth_meds_nsaids + non_compliance_ab1vte1*cc_gord + non_compliance_ab1vte1*cc_msk + non_compliance_ab1vte1*cc_stroke, family = "binomial", data = comp_data)
summary(model_composite_interact1)
# None significant 

##  Analysis 2: VTE non-compliance and VTE outcomes - final model - including interaction terms between retained variables and primary predictor, VTE non-compliance
model_vte_interact1 <-glm(formula = cc_vte_yesno ~ non_compliance_nhmrc_vte1 + vte_dopp_pt + joint + vte_1stmobil_day1 + bmi + cc_hd + age + ASA + cc_msk + vte_prev_vte + edu + cc_dep + non_compliance_nhmrc_vte1*vte_dopp_pt + non_compliance_nhmrc_vte1*joint + non_compliance_nhmrc_vte1*vte_1stmobil_day1 + non_compliance_nhmrc_vte1*bmi + non_compliance_nhmrc_vte1*cc_hd + non_compliance_nhmrc_vte1*age + non_compliance_nhmrc_vte1*ASA + non_compliance_nhmrc_vte1*cc_msk + non_compliance_nhmrc_vte1*vte_prev_vte + non_compliance_nhmrc_vte1*edu + non_compliance_nhmrc_vte1*cc_dep, family = "binomial", data = comp_data)
summary(model_vte_interact1)
# None significant 

## Analysis 3.1: Antibiotic non-compliance and all SSI outcomes - final model - including interaction terms between retained variables and primary predictor, antibiotic non-compliance
model_ssi_interact1 <-glm(formula = cc_type_jtinfect ~ non_compliance_tg_ab1 + non_compliance_nhmrc_vte1 + arth_meds_antidep + arth_meds_nsaids + bilat + bld_transf + bmi + cc_lud + cc_nc + cc_sleep_apnoea + cc_stroke + hosp_pub_priv + joint + surg_duration + non_compliance_tg_ab1*non_compliance_nhmrc_vte1 + non_compliance_tg_ab1*arth_meds_antidep + non_compliance_tg_ab1*arth_meds_nsaids + non_compliance_tg_ab1*bilat + non_compliance_tg_ab1*bld_transf + non_compliance_tg_ab1*bmi + non_compliance_tg_ab1*cc_lud + non_compliance_tg_ab1*cc_nc + non_compliance_tg_ab1*cc_sleep_apnoea + non_compliance_tg_ab1*cc_stroke + non_compliance_tg_ab1*hosp_pub_priv + non_compliance_tg_ab1*joint + non_compliance_tg_ab1*surg_duration, family = "binomial", data = comp_data)
summary(model_ssi_interact1)
# None significant 

##  Analysis 3.2: Antibiotic non-compliance and deep SSI outcomes - final model - including interaction terms between retained variables and primary predictor, antibiotic non-compliance, noting primary predictor not significant in final model
model_deep_interact1 <-glm(formula = deep_jtinfect ~ non_compliance_tg_ab1 + bmi + cc_nc + cc_stroke + joint + smoker + non_compliance_tg_ab1*bmi + non_compliance_tg_ab1*cc_nc + non_compliance_tg_ab1*cc_stroke + non_compliance_tg_ab1*joint + non_compliance_tg_ab1*smoker, family = "binomial", data = comp_data) 
summary(model_deep_interact1)
# No significant interaction terms

# No further exploratory analyses required - no significant interaction terms

```