The following code will show you the preliminary statistics ran before conducting regression analyses. This page includes:

Read in data, create new variables, clean data, and prepare data for analysis
Create visualiations depicting number of health conditions in each level of physical activity
Run exploratory data analysis examining the differences in sociodemographic & military factors based upon physical activity level.
Create table displaying results of descriptive statistics (chi squaures, t tests, anovas) based upon physical activity level.

options(knitr.table.format = "html") 

#data stuff
library(tidyverse)
library(dplyr)
  #CSV/CAV files
library(haven)
library(tableone)
  #regression stuff
library(aod)
library(gtsummary)
library(lmtest)
library(DescTools)
library(manipulate)
library(psych)
library(tableone)
library(kableExtra)

Prep Data For Analyses

Read and Select data

### Import dataset 
NHRVS_2019 <- read_sav("~/Desktop/Adams_lab/SPSS_stuff/NHRVS_2019_2020_FINAL_weighted.sav")

### Select and rename Variables
thesis_NHRVS <- dplyr::select(NHRVS_2019,
    #Background Variables __________
      caseid,weight,Age,Male_Gender,White_Race,SomeCollege_or_Higher,
      Married_Partnered,Income_60k_plus,YN_work,
      PCS_FINAL,MCS_FINAL,Weight_kg,BMI,
    #Military variables__________
      Combat_Veteran,Years_in_Military,Branch_5cat,Enlistment_status_3cat,
      MilitaryRank_3cat,
    #Mental Health Variables___________
      TOTAL_TRAUMAS,LT_PCL_33,LT_MDD_Sx_only,LT_Suicide_4cat,Suicide_Attempt_LT,
      LT_Suicide_Plan,LT_AUD,LT_NicotineDep,LT_DUD,tot_MHC,Any_MHC,
    #Physical health Variables_________
      ANY_ADL_DISABILITY,ANY_IADL_DISABILITY,Any_Disability,tot_phc,Any_PHC,Q22A_1,Q22A_2,
      Q22A_3,Q22A_4,Q22A_5,Q22A_6,Q22A_7,Q22A_8,Q22A_9,Q22A_10,Q22A_11,Q22B_1,
      Q22B_2,Q22B_3,Q22B_4,Q22B_5,
    #Exercise variables ___________
      godin_mild_winsor,godin_Mod_winsor,godin_Stren_winsor,HCS,HCS_Cats,
      godin_total_ac,god3cat,god2cat,GODIN_outliers)
    
#Rename variabes
thesis_NHRVS <- rename(thesis_NHRVS,
      MET_total = godin_total_ac,
      God_mild = godin_mild_winsor,
      God_mod = godin_Mod_winsor,
      God_stren = godin_Stren_winsor,
      College = SomeCollege_or_Higher,
      MCS = MCS_FINAL,
      PCS = PCS_FINAL,
      Arthritis = Q22A_1,
      Athsma =  Q22A_2,
      Cancer = Q22A_3,
      Chron_pain = Q22A_4,
      Liv_dis = Q22A_5,
      Diabetes = Q22A_6,
      Hrt_dis = Q22A_7,
      Hrt_atk = Q22A_8,
      High_chol = Q22A_9,
      High_bld_press = Q22A_10,
      Kid_dis = Q22A_11,
      Migrane = Q22B_1,
      MS = Q22B_2,
      Osteoporosis = Q22B_3,
      Rhum_arth = Q22B_4,
      Stroke = Q22B_5,
      LT_MDD = LT_MDD_Sx_only,
      LT_PTSD = LT_PCL_33
)

Create variables, rename stuff, and remove missing cases

thesis_NHRVS<- thesis_NHRVS %>%
  # First Create all of the variables
  mutate(thesis_NHRVS, 
    # Exercise variables
         Ex_time = (God_mild + God_mod + God_stren)*15,
            # Tells us how long people exercised for.
         MET_min = MET_total* 15,
         MET_min_W = ifelse(MET_min > 2000, 2000, MET_min),
            # Total METs per week times the duration of exercise
         Ex_rec = as.factor(ifelse(MET_min >= 500, "Sufficient", "Insufficient")),
            # Meeting activity levels (i.e., sufficient vs insufficient)
         Ex_3cat = as.factor(ifelse(MET_min < 290, 1,
                              ifelse(MET_min < 500 & MET_min >= 290, 2,3))),
         Ex_4cats = as.factor(ifelse(MET_min < 290, "Sed",
                              ifelse(MET_min < 500 & MET_min >= 290, "Mod",
                              ifelse(MET_min < 1000 & MET_min >= 500, "Active", 
                              ifelse(MET_min < 2000 & MET_min >= 1000, "Super", NA))))),
# 3 godin (HCS) categories converted to MET_min 
    # Other Variables 
         HCS_win = (God_mod*5) + (God_stren*9),
         HCS3cat = as.factor(ifelse(HCS_win < 14, "Insufficient",
                              ifelse(HCS_win < 24 & HCS_win >= 290, "Moderate", "Active"))),
         Yr5_Military = as.factor(ifelse(Years_in_Military >= 5, "5+", "<5")),
         Active_PA = as.factor(ifelse(Ex_3cat == 3,1,0)),
         Moderate_PA = as.factor(ifelse(Ex_3cat == 2,1,0)),
         Insuf_PA = as.factor(ifelse(Ex_3cat == 2,1,0)),
         Total_HC = Any_Disability + Arthritis + Cancer + Chron_pain + Liv_dis +
                    Diabetes + Hrt_dis + Hrt_atk + High_chol + High_bld_press +
                    Kid_dis + Migrane + MS + Osteoporosis + Rhum_arth + Stroke +
                    LT_MDD + LT_PTSD + LT_AUD + LT_DUD + LT_NicotineDep,
         Total_PHC = Any_Disability + Arthritis + Cancer + Chron_pain + Liv_dis +
                    Diabetes + Hrt_dis + Hrt_atk + High_chol + High_bld_press +
                    Kid_dis + Migrane + MS + Osteoporosis + Rhum_arth + Stroke,
         Total_MHC = LT_MDD + LT_PTSD + LT_AUD + LT_DUD + LT_NicotineDep,
         No_Condition = ifelse(Any_MHC == 1 | Any_PHC ==1, 0,1)
    
    ) %>%

mutate(thesis_NHRVS,
      #Physical health variables
           Any_Disability = as.factor(Any_Disability),
           Arthritis = as.factor(Arthritis),
           Athsma = as.factor(Athsma),
           Cancer = as.factor(Cancer),
           Chron_pain = as.factor(Chron_pain),
           Liv_dis = as.factor(Liv_dis),
           Diabetes = as.factor(Diabetes),
           Hrt_dis = as.factor(Hrt_dis),
           Hrt_atk = as.factor(Hrt_atk),
           High_chol = as.factor(High_chol),
           High_bld_press = as.factor(High_bld_press),
           Kid_dis = as.factor(Kid_dis),
           Migrane = as.factor(Migrane),
           MS = as.factor(MS),
           Osteoporosis = as.factor(Osteoporosis),
           Rhum_arth = as.factor(Rhum_arth),
           Stroke = as.factor(Stroke),
           Ex_3cat = as.factor(Ex_3cat),  
           Male_Gender = as.factor(Male_Gender), 
           Married_Partnered = as.factor(Married_Partnered),
           White_Race = as.factor(White_Race),
           College = as.factor(College),
           Income_60k_plus = as.factor(Income_60k_plus),
           YN_work = as.factor(YN_work),
           Combat_Veteran = as.factor(Combat_Veteran),
           Any_MHC = as.factor(Any_MHC),
           Any_PHC = as.factor(Any_PHC),
           god2cat = as.factor(god2cat),
      #Mental health variables
          LT_Suicide_4cat = as.factor(LT_Suicide_4cat),
          LT_MDD = as.factor(LT_MDD),
          LT_PTSD = as.factor(LT_PTSD),
          LT_AUD = as.factor(LT_AUD),
          LT_DUD = as.factor(LT_DUD),
          LT_NicotineDep = as.factor(LT_NicotineDep)
      #Create a non health condition variable 
)%>%
mutate(thesis_NHRVS, 
        Ex_3cat = recode(Ex_3cat,
                         "1" = "Insufficient",
                         "2" = "Moderate",
                         "3" = "Active"),
        Married_Partnered = recode(Married_Partnered,
                         "0" = "Married_Partnered",
                         "1" = "Single"),
        Male_Gender = as.factor(Male_Gender),
        Male_Gender = recode(Male_Gender,
                         "0" = "Female",
                         "1" = "Male"),
        White_Race = recode(White_Race,
                         "0" = "Not_White",
                         "1" = "White"),
        College = recode(College,
                         "0" = "No_Colege",
                         "1" = "Some_Colege"),
        Income_60k_plus = recode(Income_60k_plus,
                         "0" = "Under_60k",
                         "1" = "OVer_60k"),
        YN_work = recode(YN_work,
                         "1" = "No_Work",
                         "2" = "Working"),
        Combat_Veteran = as.factor(Combat_Veteran),
        Combat_Veteran = recode(Combat_Veteran,
                         "0" = "No_Combat",
                         "1" = "Combat"),
        Any_MHC = recode(Any_MHC,
                         "0" = "No_con",
                         "1" = "MHC"),
        Any_PHC = recode(Any_PHC,
                         "0" = "No_con",
                         "1" = "PHC"),
        LT_Suicide_4cat = as.factor(LT_Suicide_4cat),
        LT_Suicidal = recode(LT_Suicide_4cat,
                         "0" = "0",
                         "1" = "0",
                         "2" = "1",
                         "3" = "1"
                             ))
#Filter out NA values
thesis_NHRVS <- thesis_NHRVS %>%
  filter(!is.na(Ex_rec)) %>%
  filter(!is.na(Ex_3cat)) %>%
  filter(!is.na(MCS)) %>%
  filter(!is.na(PCS)) %>%
  filter(!is.na(BMI)) %>%
  filter(!is.na(Any_Disability)) %>%
  filter(!is.na(Arthritis)) %>%
  filter(!is.na(Cancer)) %>%
  filter(!is.na(Chron_pain)) %>%
  filter(!is.na(Liv_dis)) %>%
  filter(!is.na(Diabetes)) %>%
  filter(!is.na(Hrt_dis)) %>%
  filter(!is.na(Hrt_atk)) %>%
  filter(!is.na(High_chol)) %>%
  filter(!is.na(High_bld_press)) %>%
  filter(!is.na(Kid_dis)) %>%
  filter(!is.na(Migrane)) %>%
  filter(!is.na(MS)) %>%
  filter(!is.na(Osteoporosis)) %>%
  filter(!is.na(Rhum_arth)) %>%
  filter(!is.na(Stroke)) %>%
  filter(!is.na(LT_MDD)) %>%
  filter(!is.na(LT_PTSD)) %>%
  filter(!is.na(LT_AUD)) %>%
  filter(!is.na(LT_DUD)) %>%
  filter(!is.na(LT_NicotineDep)) %>%
  filter(!is.na(LT_Suicidal))
 
kable(thesis_NHRVS[1:10,1:8]) %>%
   kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

caseid	weight	Age	Male_Gender	White_Race	College	Married_Partnered	Income_60k_plus
3592	2.6012	58	Male	White	No_Colege	Single	Under_60k
2182	0.2979	64	Female	White	Some_Colege	Single	Under_60k
1311	1.4863	54	Male	Not_White	No_Colege	Married_Partnered	Under_60k
2054	0.8510	56	Male	White	Some_Colege	Married_Partnered	Under_60k
1061	1.5495	58	Male	Not_White	Some_Colege	Single	Under_60k
138	0.6388	88	Male	White	Some_Colege	Single	Under_60k
825	0.6042	55	Male	Not_White	Some_Colege	Single	Under_60k
1971	1.0539	36	Female	Not_White	Some_Colege	Single	Under_60k
1199	0.5370	69	Male	White	Some_Colege	Single	Under_60k
3994	0.7778	64	Male	White	Some_Colege	Married_Partnered	OVer_60k

Export new file to CSV

Now that we have a clean and prepped data file to analyze lets create a CSV of that for future reference.

write_csv(thesis_NHRVS, "~/Desktop/Coding/data/thesis_dataset.csv" )
haven::write_sav(thesis_NHRVS,"~/Desktop/Coding/data/thesis_SPSS.sav")

Descriptives of Variables

Discriptive Table

Here are some discriptive statistics and basic plots to see an overview of the data. I use table1() to run the descriptive statistics.

# Physical activity level value (MET minutes)
#install.packages("table1")

desc_tab<-data.frame(table1::table1(~MET_min_W + tot_MHC + tot_phc | Ex_3cat, data=thesis_NHRVS))
 knitr::kable(desc_tab) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

X.	Insufficient	Moderate	Active	Overall
	(N=1427)	(N=724)	(N=1397)	(N=3548)
MET_min_W
Mean (SD)	114 (98.2)	390 (66.8)	967 (389)	506 (462)
Median [Min, Max]	135 [0, 270]	405 [315, 495]	855 [540, 2000]	360 [0, 2000]
tot_MHC
Mean (SD)	1.21 (1.33)	1.02 (1.20)	1.04 (1.22)	1.11 (1.26)
Median [Min, Max]	1.00 [0, 6.00]	1.00 [0, 6.00]	1.00 [0, 6.00]	1.00 [0, 6.00]
tot_phc
Mean (SD)	3.15 (2.11)	2.76 (1.80)	2.53 (1.86)	2.82 (1.98)
Median [Min, Max]	3.00 [0, 14.0]	3.00 [0, 9.00]	2.00 [0, 10.0]	3.00 [0, 14.0]

Violin Plots

#Plotting Met mins and Physical activity
ggplot(thesis_NHRVS, aes(x = Ex_3cat, y = MET_min_W)) +
  geom_violin()+
  geom_boxplot(width=0.1, color="blue", alpha=0.2)+
  ggtitle("Met minutes per physical activity level")

# Total Mental health conditions
ggplot(thesis_NHRVS, aes(x = Ex_3cat, y = tot_MHC)) +
  geom_violin()+
  geom_boxplot(width=0.1, color="blue", alpha=0.2) +
  ggtitle("Total number of mental health conditions by PA level")

# Total physical health conditions
ggplot(thesis_NHRVS, aes(x = Ex_3cat, y = tot_phc)) +
  geom_violin()+
  geom_boxplot(width=0.1, color="blue", alpha=0.2)+
  ggtitle("Total number of physical health conditions by PA level")

These descriptive are helpful for us to see how the data looks. Also it can be helpful to see what may be or not be wrong within the data

Stacked Barcharts of health conditions

Cross tabs

I wanted to create a visualiation that depicted the number of veterans with or without health conditions within each physical activity level. The following code uses xtabs() to run cross tab/ chi squares. I start by extracting the number of veterans, and then convert the table into long data to plot the figure.

Extracting values from chi square tables.

#table with and without health conditions
  AD_xtab2 <- xtabs(~Any_Disability +Ex_3cat,data = thesis_NHRVS)
  xtable2  <- rbind(AD_xtab2,xtabs(~Athsma+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Arthritis+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Cancer+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Chron_pain+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Liv_dis+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Diabetes+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Hrt_dis+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Hrt_atk+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~High_chol+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~High_bld_press+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Kid_dis+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Migrane+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~MS+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Osteoporosis+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Rhum_arth+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~Stroke+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~LT_AUD+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~LT_DUD+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~LT_MDD+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~LT_PTSD+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~LT_NicotineDep+Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~LT_Suicidal +Ex_3cat,data = thesis_NHRVS))
  xtable2  <- rbind(xtable2,xtabs(~No_Condition+Ex_3cat,data = thesis_NHRVS))
xtable2<-data.frame(xtable2)
#create a varaible for diagnosis
xtable2$condition<- NA
diagnose<- c("No Condition","Condition")
xtable2$condition = diagnose
knitr::kable(xtable2[1:10,]) %>%
   kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

	Insufficient	Moderate	Active	condition
X0	1133	637	1264	No Condition
X1	294	87	133	Condition
X0.1	1199	645	1247	No Condition
X1.1	228	79	150	Condition
X0.2	830	444	847	No Condition
X1.2	597	280	550	Condition
X0.3	1131	577	1098	No Condition
X1.3	296	147	299	Condition
X0.4	992	563	1079	No Condition
X1.4	435	161	318	Condition

Create long dataset

Next I manually create a long dataset. I start by creating the variable names needed for the long dataset.

#2. Varaible names for table 
 var_names2 = c("Any Disability","Any Disability","Asthsma","Asthsma",
                "Arthritis", "Arthritis",
                "Cancer","Cancer",
                "Chron pain","Chron pain",
                "Liver Disease","Liver Disease",
                "Diabetes","Diabetes",
                "Heart Disease", "Heart Disease",
                "Heart Attack", "Heart Attack",
                "High Cholesterol", "High Cholesterol",
                "Hypertension", "Hypertension", 
                "Kidney Disease", "Kidney Disease",
                "Migraine",  "Migraine", 
                "MS", "MS", 
                "Osteoporosis", "Osteoporosis",
                "Rheumatoid Arthritis", "Rheumatoid Arthritis",
                "Stroke","Stroke",
                "AUD", "AUD", 
                "DUD","DUD",
                "MDD", "MDD", 
                "PTSD", "PTSD",
                "Nicotine Dep", "Nicotine Dep",
                "Suicidality","Suicidality",
                "No Condition", "No Condition")


#3. make long data frame (needed for plot)
      condition_df2          <- var_names2
      condition_df2 <- data.frame(condition_df2) 
      condition_df2[1:144,1]  <- var_names2              
      condition_df2[1:48,2]  <- xtable2$Insufficient
      condition_df2[1:48,3]  =  "Insufficient"
      condition_df2[49:96,2] <- xtable2$Moderate
      condition_df2[49:96,3] <- "Moderate"
      condition_df2[97:144,2] <- xtable2$Active
      condition_df2[97:144,3] =  "Sufficient"
      condition_df2$Diagnosis = xtable2$condition
      names(condition_df2) <- c("Variable","Value","Ex_3cat","Diagnosis")
      
      knitr::kable(condition_df2[1:10,]) %>%
         kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

Variable	Value	Ex_3cat	Diagnosis
Any Disability	1133	Insufficient	No Condition
Any Disability	294	Insufficient	Condition
Asthsma	1199	Insufficient	No Condition
Asthsma	228	Insufficient	Condition
Arthritis	830	Insufficient	No Condition
Arthritis	597	Insufficient	Condition
Cancer	1131	Insufficient	No Condition
Cancer	296	Insufficient	Condition
Chron pain	992	Insufficient	No Condition
Chron pain	435	Insufficient	Condition

Stacked Barchart

Now that we have the data we can plot it! I use ggplot() to plot the figure. As this is a bar chart I use geom_bar() and use position="stack" to make the stacked bar chart. I used fct_reorder() to order the chart in order of the number of veterans reporting that condition. facet_grid() was used to seperate the the number of veterans with and without conditions into different figures. Lastly of note, I used coord_flip() to put both of the figures on the Y axis.

ggplot(condition_df2, aes(fill=Ex_3cat, y=Value, x= fct_reorder(Variable, Value))) + 
  geom_bar(position="stack", stat="identity")+
  facet_grid(~ Diagnosis)+ 
  ylab('Number of Veterans') +
  xlab('Health Condition')+
  labs(fill='Physical Activity') +
  theme_classic()+
  theme(legend.position="none")+
  coord_flip()

While this figure conveys what I want it to, it doesn’t really help me interpret anything and looks bad in my opinion. I wanted to have both stacked bar charts on opposite sides of the variable names. Also I was unable to figure out how to reverse the X axis so I went into Illustrator and made the figure as I wanted it to look. Here is the final figure!

Note. Stacked bar chart of the Number of veterans within each physical activity level between all health conditions. Total sample is observed between the left side of the figure (i.e., veterans who did not report health condition), and the right (i.e, veterans who did report a health condition) RA = Rheumatoid Arthritis, MS = Multiple Sclerosis, AUD = Alcohol Use Disorder, DUD = Drug Use Disorder, MDD = Major Depressive Disorder, Nicotine Dep = Nicotine Dependence, PTSD = Posttraumatic Stress Disorder. No Condition is reverse coded on the no condition figure (i.e., having a health condition).

Exploratory Data Analysis (EDA)

Associations With Exercsie

The first step in our analysis is to figure out what sociodemographic / military characteristics are associated with differences in physical activity level (i.e., our dependent variable). I use CreateTableOne() to run the analyses as it will put everything in super nice table for me automatically. This table till tell me which covaaraites I need to use for my analysis.

library(tableone)
EX_Vars <- c("Age", "Male_Gender", "White_Race", "College",     
                 "Married_Partnered", "Income_60k_plus", "YN_work", "PCS", 
                 "MCS", "Weight_kg", "BMI", "Combat_Veteran", 
                 "Yr5_Military", "TOTAL_TRAUMAS", "tot_phc", 
                 "tot_MHC")

EX_catVars     <- c("Male_Gender", "White_Race", "College",     
                 "Married_Partnered", "Income_60k_plus", "YN_work", "Combat_Veteran", 
                 "Yr5_Military","Ex_3cat")
Exercise_EDA <- CreateTableOne(vars = EX_Vars, strata = "Ex_3cat", data = thesis_NHRVS, 
               factorVars = EX_catVars)


full_sample<- CreateTableOne(vars = EX_Vars, data = thesis_NHRVS, 
               factorVars = EX_catVars)
full_sample <- print(full_sample, formatOptions = list(big.mark = ","))
test_tab <- print(Exercise_EDA, formatOptions = list(big.mark = ","))
Demo_tab <- cbind(full_sample, test_tab[,1:4])

 knitr::kable(Demo_tab) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

	Overall	Insufficient	Moderate	Active	p
n	3,548	1,427	724	1,397
Age (mean (SD))	66.25 (12.98)	67.19 (12.48)	67.15 (12.65)	64.84 (13.51)	<0.001
Male_Gender = Male (%)	3105 (87.5)	1231 (86.3)	643 (88.8)	1231 (88.1)	0.164
White_Race = White (%)	2903 (81.8)	1172 (82.1)	591 (81.6)	1140 (81.6)	0.926
College = Some_Colege (%)	3096 (87.3)	1197 (83.9)	624 (86.2)	1275 (91.3)	<0.001
Married_Partnered = Single (%)	2514 (70.9)	978 (68.5)	514 (71.0)	1022 (73.2)	0.026
Income_60k_plus = Under_60k (%)	1458 (41.1)	659 (46.2)	284 (39.2)	515 (36.9)	<0.001
YN_work = Working (%)	2124 (59.9)	928 (65.0)	429 (59.3)	767 (54.9)	<0.001
PCS (mean (SD))	46.65 (9.49)	43.48 (10.47)	47.79 (8.52)	49.30 (7.81)	<0.001
MCS (mean (SD))	53.16 (7.68)	52.35 (8.37)	53.59 (6.93)	53.77 (7.22)	<0.001
Weight_kg (mean (SD))	90.16 (18.93)	92.96 (20.36)	90.14 (17.92)	87.31 (17.46)	<0.001
BMI (mean (SD))	29.05 (5.49)	30.08 (6.01)	28.99 (5.29)	28.04 (4.82)	<0.001
Combat_Veteran = Combat (%)	1206 (34.0)	463 (32.5)	242 (33.4)	501 (35.9)	0.147
Yr5_Military = 5+ (%)	1296 (36.5)	494 (34.6)	262 (36.2)	540 (38.7)	0.082
TOTAL_TRAUMAS (mean (SD))	9.68 (8.28)	9.00 (7.86)	10.01 (8.50)	10.22 (8.54)	<0.001
tot_phc (mean (SD))	2.82 (1.98)	3.15 (2.11)	2.76 (1.80)	2.53 (1.86)	<0.001
tot_MHC (mean (SD))	1.11 (1.26)	1.21 (1.33)	1.02 (1.20)	1.04 (1.22)	<0.001

This table lets us see differences between all of the variables for the exercise categories. If you look to the right of the Moderate category, you’ll see the p-value for the tests done. The default test used by CreateTableOne are chi-squares for categorical variables and a t-test for continuous variables.

These variables should be used for the regressions/ancovas - Age,Some_college_or_higher, Income_60K_plus, YN_work, PCS_final or MCS_final,BMI

Associations With Health Conditions

Next I am going to look at how covaaraites relate to both physical health and mental health conditions. I am going to repeat the same analysis as above with CreateTableOne() except I am changing strata = to any mental health condition ("Any_MHC) or any physical health condition (Any_PHC).

MH_Vars <- c("Age", "Male_Gender", "White_Race", "College",     
                 "Married_Partnered", "Income_60k_plus", "YN_work", "PCS", 
                 "MCS", "Weight_kg", "BMI", "Combat_Veteran", 
                 "Yr5_Military", "TOTAL_TRAUMAS", "tot_phc", 
                 "tot_MHC")

MH_catVars     <- c("Male_Gender", "White_Race", "College",     
                 "Married_Partnered", "Income_60k_plus", "YN_work",   "Combat_Veteran", 
                 "Yr5_Military","Any_MHC")
MH_EDA <- CreateTableOne(vars = MH_Vars, strata = "Any_MHC", data = thesis_NHRVS, 
               factorVars = MH_catVars)


mh_tab <- print(MH_EDA, formatOptions = list(big.mark = ","))
Demo_tab <- cbind(Demo_tab,mh_tab[,1:3])

PH_Vars <- c("Age", "Male_Gender", "White_Race", "College",     
                 "Married_Partnered", "Income_60k_plus", "YN_work", "PCS", 
                 "MCS", "Weight_kg", "BMI", "Combat_Veteran", 
                 "Yr5_Military", "TOTAL_TRAUMAS", "tot_phc", 
                 "tot_MHC")

PH_catVars     <- c("Male_Gender", "White_Race", "College",     
                 "Married_Partnered", "Income_60k_plus", "YN_work", "Combat_Veteran", 
                 "Yr5_Military","Any_PHC")

PH_EDA <- CreateTableOne(vars = MH_Vars, strata = "Any_PHC", data = thesis_NHRVS, 
               factorVars = MH_catVars)

ph_tab <- print(PH_EDA, formatOptions = list(big.mark = ","))
Demo_tab <- cbind(Demo_tab,ph_tab[,1:3])

Now that I have created the tables, I can combine these with the physical activity level. The resulting table Demo_tab will be used in the paper.

Demo_tab<- as.data.frame(Demo_tab)
kbl(Demo_tab[1:10,]) %>%
   kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

	Overall	Insufficient	Moderate	Active	p	MHC	No_con	p	No_con	PHC	p
n	3,548	1,427	724	1,397		2,048	1,500		379	3,169
Age (mean (SD))	66.25 (12.98)	67.19 (12.48)	67.15 (12.65)	64.84 (13.51)	<0.001	64.38 (13.09)	68.82 (12.37)	<0.001	56.54 (14.19)	67.42 (12.32)	<0.001
Male_Gender = Male (%)	3105 (87.5)	1231 (86.3)	643 (88.8)	1231 (88.1)	0.164	1755 (85.7)	1350 (90.0)	<0.001	302 (79.7)	2803 (88.5)	<0.001
White_Race = White (%)	2903 (81.8)	1172 (82.1)	591 (81.6)	1140 (81.6)	0.926	1663 (81.2)	1240 (82.7)	0.283	304 (80.2)	2599 (82.0)	0.430
College = Some_Colege (%)	3096 (87.3)	1197 (83.9)	624 (86.2)	1275 (91.3)	<0.001	1768 (86.3)	1328 (88.5)	0.058	335 (88.4)	2761 (87.1)	0.537
Married_Partnered = Single (%)	2514 (70.9)	978 (68.5)	514 (71.0)	1022 (73.2)	0.026	1400 (68.4)	1114 (74.3)	<0.001	267 (70.4)	2247 (70.9)	0.900
Income_60k_plus = Under_60k (%)	1458 (41.1)	659 (46.2)	284 (39.2)	515 (36.9)	<0.001	905 (44.2)	553 (36.9)	<0.001	138 (36.4)	1320 (41.7)	0.057
YN_work = Working (%)	2124 (59.9)	928 (65.0)	429 (59.3)	767 (54.9)	<0.001	1228 (60.0)	896 (59.7)	0.919	123 (32.5)	2001 (63.1)	<0.001
PCS (mean (SD))	46.65 (9.49)	43.48 (10.47)	47.79 (8.52)	49.30 (7.81)	<0.001	44.94 (9.95)	48.99 (8.25)	<0.001	52.37 (6.15)	45.97 (9.58)	<0.001
MCS (mean (SD))	53.16 (7.68)	52.35 (8.37)	53.59 (6.93)	53.77 (7.22)	<0.001	51.09 (8.84)	55.99 (4.35)	<0.001	53.39 (7.71)	53.14 (7.67)	0.552

write_csv(Demo_tab, "~/Desktop/Coding/data/Demo_tab_basic.csv")

Tests of difference

This next section is extracting the F test score, X^2 test, and t test values for the analysis above. These values will be used in the final demogrpahics table

Physical activity levels

Becasue some of the covariates are discrete variables while others are binary categorical/ non-binary categorical variables, I will be using aov() for ANOVAs, and using summary() to extract F test, t test, X^2 and scores. As there is no function to run all of these together, I manually ran the analysis on all of the covariates and combined them into one table. Example of the code used for one covaraite is below:

library(gmodels)

## Registered S3 method overwritten by 'gdata':
##   method         from     
##   reorder.factor DescTools

#### Create test of difference values and put them into a table

#Age
Age_test_act <- aov(Age~ Ex_3cat, data = thesis_NHRVS)
test_act_vars = c('Age')
Activity_val = c(summary(Age_test_act)[[1]]$`F value`[[1]])
Activity_p = c(summary(Age_test_act)[[1]]$`Pr(>F)`[[1]])

F test and chi square will be extracted for the remaining variables: - Gender, White, some college, Married_partnered, Income 60k +, Working status, physical composite score, mental composite score, BMI, 5 years + in the military, total traumas, total physical health conditions, total mental health conditions.

Now that we have extracted all of the values, we can create a data frame for all of the test of difference values. We use cbind() to combine the test of difference values with the p values.

val_df <- data.frame(test_act_vars,Activity_val)
val_df <- cbind(val_df,Activity_p)

Physical and Mental health Conditions

The same analysis will be conducted for these covariates by physical and mental health condition as well.

Combine into one table

Now that we have the test of differences for our covariates by physical activity level, and physical/ mental health conditions we can combine them into one table.

val_df <- cbind(val_df,PHC_val,PHC_p)
val_df <- cbind(val_df,MHC_val,MHC_p)

kbl(val_df) %>%
   kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

test_act_vars	Activity_val	Activity_p	PHC_val	PHC_p	PHC_val	PHC_p	MHC_val	MHC_p
Age	13.8452951	0.0000010	-14.2823113	0.0000000	-14.2823113	0.0000000	-10.2981308	0.0000000
Male	3.6197033	0.1636784	302.0000000	0.0000011	302.0000000	0.0000011	1755.0000000	0.0001263
White	0.1540486	0.9258674	0.7391466	0.3899344	0.7391466	0.3899344	1.2501630	0.2635214
College	35.5713654	0.0000000	0.4874626	0.4850613	0.4874626	0.4850613	3.7876050	0.0516335
Married_Partnered	7.3093010	0.0258705	0.0342516	0.8531726	0.0342516	0.8531726	14.6317354	0.0001307
Income_60k_plus	26.6195517	0.0000017	3.8425559	0.0499673	3.8425559	0.0499673	19.1802274	0.0000119
YN_work	30.2795478	0.0000003	132.6934452	0.0000000	132.6934452	0.0000000	0.0186681	0.8913222
PCS	151.4903468	0.0000000	17.8524197	0.0000000	17.8524197	0.0000000	-13.2171473	0.0000000
MCS	13.5297159	0.0000014	0.5927749	0.5536157	0.5927749	0.5536157	-21.7262689	0.0000000
BMI	49.9954758	0.0000000	-7.3719801	0.0000000	-7.3719801	0.0000000	5.1761500	0.0000002
Combat Veteran	3.8387085	0.1467017	6.9872209	0.0082094	6.9872209	0.0082094	2.8173357	0.0932511
5+ Years in Military	5.0053933	0.0818639	0.5483596	0.4589887	0.5483596	0.4589887	0.2501034	0.6170023
TOTAL TRAUMAS	8.4014898	0.0002290	-5.2792503	0.0000002	-5.2792503	0.0000002	14.3806833	0.0000000
Total_PHC	36.1981881	0.0000000	-97.9231626	0.0000000	-97.9231626	0.0000000	78.9721525	0.0000000
Total MHC	9.0061062	0.0001255	-2.2197261	0.0269082	-2.2197261	0.0269082	8.6206032	0.0000000

Export to CSV

write_csv(val_df, "~/Desktop/Coding/data/thesis_test_dif.csv" )

Preliminary Statistics