🎓 Tidy tools for academics
Install the development version from Github with:
## install devtools if not already
if (!requireNamespace("devtools", quietly = TRUE)) {
install.packages("devtools")
}
## install tidyversity from Github
devtools::install_github("mkearney/tidyversity")
Conduct an Ordinary Least Squares (OLS) regression analysis.
polcom %>%
tidy_regression(follow_trump ~ news_1 + ambiv_sexism_1) %>%
tidy_summary()
#> # A tidy model
#> Model formula : follow_trump ~ news_1 + ambiv_sexism_1
#> Model type : Ordinary Least Squares (OLS) regression
#> Model data : 243 (observations) X 3 (variables)
#> $fit
#> # A tibble: 6 x 6
#> fit_stat n df estimate p.value stars
#> <chr> <int> <int> <dbl> <dbl> <chr>
#> 1 F 243 2 3.83 0.0230 *
#> 2 R^2 243 NA 0.0309 NA ""
#> 3 Adj R^2 243 NA 0.0229 NA ""
#> 4 RMSE 243 NA 0.409 NA ""
#> 5 AIC 243 NA 260. NA ""
#> 6 BIC 243 NA 274. NA ""
#>
#> $coef
#> # A tibble: 3 x 7
#> term est s.e. est.se p.value stars std.est
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 (Intercept) 0.745 0.0969 7.69 0. *** 0.
#> 2 news_1 0.0220 0.0122 1.81 0.0714 + 0.0479
#> 3 ambiv_sexism_1 -0.0385 0.0206 -1.87 0.0627 + -0.0495
Conduct a logistic regression analysis for binary (dichotomous) outcomes.
polcom %>%
tidy_regression(follow_trump ~ news_1 + ambiv_sexism_1, type = "logistic") %>%
tidy_summary()
#> # A tidy model
#> Model formula : follow_trump ~ news_1 + ambiv_sexism_1
#> Model type : logistic
#> Model data : 243 (observations) X 3 (variables)
#> $fit
#> # A tibble: 7 x 6
#> fit_stat n df estimate p.value stars
#> <chr> <int> <int> <dbl> <dbl> <chr>
#> 1 χ2 243 240 247. 0.357 ""
#> 2 Δχ2 243 2 7.47 0.0239 *
#> 3 Nagelkerke R^2 243 NA 0.0303 NA ""
#> 4 McFadden R^2 243 NA 0.0293 NA ""
#> 5 RMSE 243 NA 2.54 NA ""
#> 6 AIC 243 NA 253. NA ""
#> 7 BIC 243 NA 264. NA ""
#>
#> $coef
#> # A tibble: 3 x 7
#> term est s.e. est.se p.value stars std.est
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 (Intercept) 1.13 0.553 2.05 0.0405 * 0.
#> 2 news_1 0.127 0.0702 1.81 0.0707 + 0.195
#> 3 ambiv_sexism_1 -0.229 0.122 -1.87 0.0613 + -0.201
Conduct a poisson regression analysis for count data.
polcom %>%
mutate(polarize = abs(therm_1 - therm_2)) %>%
tidy_regression(polarize ~ news_1 + ambiv_sexism_1, type = "poisson") %>%
tidy_summary()
#> # A tidy model
#> Model formula : polarize ~ news_1 + ambiv_sexism_1
#> Model type : Poisson regression
#> Model data : 242 (observations) X 3 (variables)
#> $fit
#> # A tibble: 7 x 6
#> fit_stat n df estimate p.value stars
#> <chr> <int> <int> <dbl> <dbl> <chr>
#> 1 χ2 242 239 6549. 0. ***
#> 2 Δχ2 242 2 399. 2.20e-87 ***
#> 3 Nagelkerke R^2 242 NA 0.808 NA ""
#> 4 McFadden R^2 242 NA 0.0574 NA ""
#> 5 RMSE 242 NA 0.760 NA ""
#> 6 AIC 242 NA 7725. NA ""
#> 7 BIC 242 NA 7736. NA ""
#>
#> $coef
#> # A tibble: 3 x 7
#> term est s.e. est.se p.value stars std.est
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 (Intercept) 3.80 0.0382 99.4 0. *** 0.
#> 2 news_1 0.0447 0.00478 9.36 0. *** 0.881
#> 3 ambiv_sexism_1 -0.126 0.00797 -15.9 0. *** -2.23
Conduct a negative binomial regression analysis for overdispersed count data.
polcom %>%
mutate(polarize = abs(therm_1 - therm_2)) %>%
tidy_regression(polarize ~ news_1 + ambiv_sexism_1, type = "negbinom") %>%
tidy_summary()
#> # A tidy model
#> Model formula : polarize ~ news_1 + ambiv_sexism_1
#> Model type : Negative binomial regression
#> Model data : 242 (observations) X 3 (variables)
#> Warning: glm.fit: algorithm did not converge
#> $fit
#> # A tibble: 7 x 6
#> fit_stat n df estimate p.value stars
#> <chr> <int> <int> <dbl> <dbl> <chr>
#> 1 χ2 242 239 293. 0.00943 **
#> 2 Δχ2 242 2 8.44 0.0147 *
#> 3 Nagelkerke R^2 242 NA 0.0343 NA ""
#> 4 McFadden R^2 242 NA 0.0280 NA ""
#> 5 RMSE 242 NA 0.761 NA ""
#> 6 AIC 242 NA 2312. NA ""
#> 7 BIC 242 NA 2326. NA ""
#>
#> $coef
#> # A tibble: 3 x 7
#> term est s.e. est.se p.value stars std.est
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 (Intercept) 3.74 0.258 14.5 0. *** NA
#> 2 news_1 0.0526 0.0322 1.63 0.103 "" NA
#> 3 ambiv_sexism_1 -0.123 0.0541 -2.27 0.0230 * NA
polcom %>%
mutate(polarize = abs(therm_1 - therm_2)) %>%
tidy_regression(polarize ~ news_1 + ambiv_sexism_1, type = "quasipoisson", robust = TRUE) %>%
tidy_summary()
#> # A tidy model
#> Model formula : polarize ~ news_1 + ambiv_sexism_1
#> Model type : [Robust] Poisson regression
#> Model data : 242 (observations) X 3 (variables)
#> $fit
#> # A tibble: 7 x 6
#> fit_stat n df estimate p.value stars
#> <chr> <int> <int> <dbl> <dbl> <chr>
#> 1 χ2 242 239 6549. 0. ***
#> 2 Δχ2 242 2 399. 2.20e-87 ***
#> 3 Nagelkerke R^2 242 NA 0.808 NA ""
#> 4 McFadden R^2 242 NA 0.0574 NA ""
#> 5 RMSE 242 NA 0.760 NA ""
#> 6 AIC 242 NA 7725. NA ""
#> 7 BIC 242 NA 7736. NA ""
#>
#> $coef
#> # A tibble: 3 x 7
#> term est s.e. est.se p.value stars std.est
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 (Intercept) 3.80 0.0382 99.4 0. *** 0.
#> 2 news_1 0.0447 0.00478 9.36 0. *** 0.881
#> 3 ambiv_sexism_1 -0.126 0.00797 -15.9 0. *** -2.23
Conduct an analysis of variance (ANOVA).
polcom %>%
mutate(sex = ifelse(sex == 1, "Male", "Female"),
vote_choice = case_when(
vote_2016_choice == 1 ~ "Clinton",
vote_2016_choice == 2 ~ "Trump",
TRUE ~ "Other")) %>%
tidy_anova(pp_party ~ sex * vote_choice) %>%
tidy_summary()
#> NULL
#> $fit
#> # A tibble: 6 x 6
#> fit_stat n df estimate p.value stars
#> <chr> <int> <int> <dbl> <dbl> <chr>
#> 1 F 243 5 53.3 6.19e-37 ***
#> 2 R^2 243 NA 0.529 NA ""
#> 3 Adj R^2 243 NA 0.519 NA ""
#> 4 RMSE 243 NA 1.24 NA ""
#> 5 AIC 243 NA 801. NA ""
#> 6 BIC 243 NA 826. NA ""
#>
#> $coef
#> # A tibble: 4 x 8
#> term est s.e. est.se statistic p.value stars std.est
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 sex 1. 19.2 19.2 12.6 0.000474 *** 2.
#> 2 vote_choice 2. 389. 194. 127. 0. *** 2.
#> 3 sex:vote_choice 2. 0.519 0.259 0.169 0.844 "" 2.
#> 4 Residuals 237. 363. 1.53 NA NA "" 237.
polcom %>%
tidy_ttest(pp_ideology ~ follow_trump) %>%
tidy_summary()
#> # A tidy model
#> Model formula : pp_ideology ~ follow_trump
#> Model data : 244 (observations) X 2 (variables)
#> $fit
#> # A tibble: 2 x 6
#> group df mean diff lo.95 hi.05
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FALSE 76.9 4.19 0.922 0.308 1.54
#> 2 TRUE 76.9 3.26 -0.922 -0.308 -1.54
#>
#> $coef
#> # A tibble: 1 x 4
#> est t p.value stars
#> <dbl> <dbl> <dbl> <chr>
#> 1 0.922 2.99 0.00372 **
Conduct latent variable analysis using structural equation modeling.
polcom %>%
mutate(therm_2 = therm_2 / 10,
therm_1 = 10 - therm_1 / 10) %>%
tidy_sem(news =~ news_1 + news_2 + news_3 + news_4 + news_5 + news_6,
ambiv_sexism =~ ambiv_sexism_1 + ambiv_sexism_2 + ambiv_sexism_3 +
ambiv_sexism_4 + ambiv_sexism_5 + ambiv_sexism_6,
partisan =~ a*therm_1 + a*therm_2,
ambiv_sexism ~ age + sex + hhinc + edu + news + partisan) %>%
tidy_summary()
#> # A tidy model
#> Model formula : news =~ news_1 + news_2 + news_3 + news_4 + news_5 + news_6
#> ambiv_sexism =~ ambiv_sexism_1 + ambiv_sexism_2 + ambiv_sexism_3 + ambiv_sexism_4 +
#> ambiv_sexism_5 + ambiv_sexism_6
#> partisan =~ a * therm_1 + a * therm_2
#> ambiv_sexism ~ age + sex + hhinc + edu + news + partisan
#> Model data : 235 (observations) X 18 (variables)
#> $fit
#> # A tibble: 8 x 6
#> fit_stat n df estimate p.value stars
#> <chr> <int> <int> <dbl> <dbl> <chr>
#> 1 chisq 235 127 240. 6.21e-9 ***
#> 2 cfi 235 NA 0.907 NA ""
#> 3 tli 235 NA 0.892 NA ""
#> 4 aic 235 NA 16139. NA ""
#> 5 bic 235 NA 16256. NA ""
#> 6 rmsea 235 NA 0.0614 NA ""
#> 7 srmr 235 NA 0.0731 NA ""
#> 8 ambiv_sexism:R^2 235 NA 0.379 NA ""
#>
#> $coef
#> # A tibble: 20 x 7
#> term est se est.se p.value stars std.est
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 news =~ news_1 1.00 0. NA NA "" 0.173
#> 2 news =~ news_2 1.59 0.722 2.20 0.0276 * 0.340
#> 3 news =~ news_3 5.07 2.10 2.42 0.0156 * 0.781
#> 4 news =~ news_4 5.59 2.31 2.42 0.0157 * 0.851
#> 5 news =~ news_5 3.49 1.48 2.35 0.0186 * 0.520
#> 6 news =~ news_6 1.25 0.683 1.84 0.0660 + 0.196
#> 7 ambiv_sexism =~ ambiv_s… 1.00 0. NA NA "" 0.825
#> 8 ambiv_sexism =~ ambiv_s… 0.942 0.0671 14.0 0. *** 0.801
#> 9 ambiv_sexism =~ ambiv_s… 0.795 0.0671 11.8 0. *** 0.706
#> 10 ambiv_sexism =~ ambiv_s… 0.743 0.0638 11.6 0. *** 0.697
#> 11 ambiv_sexism =~ ambiv_s… 0.902 0.0616 14.6 0. *** 0.825
#> 12 ambiv_sexism =~ ambiv_s… 0.904 0.0637 14.2 0. *** 0.807
#> 13 partisan =~ therm_1 1.00 0. NA NA "" 0.577
#> 14 partisan =~ therm_2 1.00 0. NA NA "" 0.592
#> 15 ambiv_sexism ~ age -0.00421 0.00511 -0.824 0.410 "" -0.0513
#> 16 ambiv_sexism ~ sex -0.271 0.130 -2.09 0.0367 * -0.130
#> 17 ambiv_sexism ~ hhinc -0.0205 0.0233 -0.878 0.380 "" -0.0567
#> 18 ambiv_sexism ~ edu -0.0877 0.0685 -1.28 0.201 "" -0.0828
#> 19 ambiv_sexism ~ news 0.130 0.215 0.607 0.544 "" 0.0468
#> 20 ambiv_sexism ~ partisan 0.347 0.0690 5.03 0. *** 0.592
Comes with one data set.
Consists of survey responses to demographic, background, and likert-type attitudinal items about political communication.
print(tibble::as_tibble(polcom), n = 5)
#> # A tibble: 244 x 63
#> follow_trump news_1 news_2 news_3 news_4 news_5 news_6 ambiv_sexism_1
#> * <lgl> <int> <int> <int> <int> <int> <int> <int>
#> 1 TRUE 8 1 1 1 1 6 3
#> 2 TRUE 1 1 1 1 1 1 5
#> 3 TRUE 8 1 1 1 8 1 5
#> 4 TRUE 8 1 1 1 1 6 2
#> 5 TRUE 6 1 2 1 1 3 4
#> # ... with 239 more rows, and 55 more variables: ambiv_sexism_2 <int>,
#> # ambiv_sexism_3 <int>, ambiv_sexism_4 <int>, ambiv_sexism_5 <int>,
#> # ambiv_sexism_6 <int>, img1_hrc_1 <int>, img1_hrc_2 <dbl>,
#> # img1_hrc_3 <int>, img1_hrc_4 <dbl>, img1_hrc_5 <int>,
#> # img1_hrc_6 <int>, img1_hrc_7 <int>, img1_hrc_8 <int>,
#> # img1_hrc_9 <int>, img2_hrc_10 <int>, img2_hrc_11 <int>,
#> # img2_hrc_12 <dbl>, img2_hrc_13 <int>, img2_hrc_14 <int>,
#> # img2_hrc_15 <dbl>, img1_djt_1 <int>, img1_djt_2 <dbl>,
#> # img1_djt_3 <int>, img1_djt_4 <dbl>, img1_djt_5 <int>,
#> # img1_djt_6 <int>, img1_djt_7 <int>, img1_djt_8 <int>,
#> # img1_djt_9 <int>, img2_djt_10 <int>, img2_djt_11 <int>,
#> # img2_djt_12 <dbl>, img2_djt_13 <int>, img2_djt_14 <int>,
#> # img2_djt_15 <dbl>, pie_1 <int>, pie_2 <int>, pie_3 <int>, pie_4 <int>,
#> # vote_2016 <int>, vote_2016_choice <int>, pp_ideology <int>,
#> # pp_party <int>, pp_party_lean <int>, therm_1 <int>, therm_2 <int>,
#> # therm_3 <int>, therm_4 <int>, therm_5 <int>, age <int>, sex <int>,
#> # gender <int>, race <int>, edu <int>, hhinc <int>
Return summary statistics in the form of a data frame (not yet added).
## summary stats for social media use (numeric) variables
summarize_numeric(polcom_survey, smuse1:smuse3)
## summary stats for respondent sex and race (categorical) variables
summarize_categorical(polcom_survey, sex, race)
Estimate Cronbach’s alpha for a set of variables.
## reliability of social media use items
cronbachs_alpha(polcom, ambiv_sexism_1:ambiv_sexism_6)
#> items alpha alpha.std
#> 1 ambiv_sexism_1:ambiv_sexism_6 0.904609 0.904600
#> 2 -ambiv_sexism_1 0.882322 0.882225
#> 3 -ambiv_sexism_2 0.884272 0.884121
#> 4 -ambiv_sexism_3 0.896061 0.896218
#> 5 -ambiv_sexism_4 0.897127 0.897411
#> 6 -ambiv_sexism_5 0.883554 0.883420
#> 7 -ambiv_sexism_6 0.881595 0.881855