- Notifications
You must be signed in to change notification settings - Fork16
🎓 Tidy tools for academics
License
Unknown, MIT licenses found
Licenses found
mkearney/tidyversity
Folders and files
| Name | Name | Last commit message | Last commit date | |
|---|---|---|---|---|
Repository files navigation
🎓 Tidy tools foracademics
Install the development version fromGithub with:
## install devtools if not alreadyif (!requireNamespace("devtools",quietly=TRUE)) { install.packages("devtools")}## install tidyversity from Githubdevtools::install_github("mkearney/tidyversity")
Load the package (it, of course, plays nicely with tidyverse).
## load tidyverselibrary(tidyverse)#> ── Attaching packages ─────────────────────────────────────────────────── tidyverse 1.2.1 ──#> ✔ ggplot2 2.2.1 ✔ purrr 0.2.4#> ✔ tibble 1.4.2 ✔ dplyr 0.7.4#> ✔ tidyr 0.8.0 ✔ stringr 1.3.0#> ✔ readr 1.1.1 ✔ forcats 0.3.0#> ── Conflicts ────────────────────────────────────────────────────── tidyverse_conflicts() ──#> ✖ dplyr::filter() masks stats::filter()#> ✖ dplyr::lag() masks stats::lag()## load tidyversitylibrary(tidyversity)
Conduct an Ordinary Least Squares (OLS) regression analysis.
polcom %>% tidy_regression(follow_trump~news_1+ambiv_sexism_1) %>% tidy_summary()#> # A tidy model#> Model formula : follow_trump ~ news_1 + ambiv_sexism_1#> Model type : Ordinary Least Squares (OLS) regression#> Model pkg::fun : stats::lm()#> Model data : 243 (observations) X 3 (variables)#> $fit#> fit_stat n df estimate p.value stars#> F 243 2 3.831 0.023 *#> R^2 243 - 0.031 -#> Adj R^2 243 - 0.023 -#> RMSE 243 - 0.409 -#> AIC 243 - 260.148 -#> BIC 243 - 274.121 -#>#> $coef#> term est s.e. est.se p.value stars std.est#> (Intercept) 0.745 0.097 7.692 <.001 *** <.001#> news_1 0.022 0.012 1.811 0.071 + 0.048#> ambiv_sexism_1 -0.038 0.021 -1.870 0.063 + -0.050
Conduct a logistic regression analysis for binary (dichotomous)outcomes.
polcom %>% tidy_regression(follow_trump~news_1+ambiv_sexism_1,type="logistic") %>% tidy_summary()#> # A tidy model#> Model formula : follow_trump ~ news_1 + ambiv_sexism_1#> Model type : Logistic regression#> Model pkg::fun : stats::glm()#> Model data : 243 (observations) X 3 (variables)#> $fit#> fit_stat n df estimate p.value stars#> χ2 243 240 247.442 0.357#> Δχ2 243 2 7.466 0.024 *#> Nagelkerke R^2 243 - 0.030 -#> McFadden R^2 243 - 0.029 -#> RMSE 243 - 2.540 -#> AIC 243 - 253.442 -#> BIC 243 - 263.921 -#>#> $coef#> term est s.e. est.se p.value stars std.est#> (Intercept) 1.133 0.553 2.049 0.040 * <.001#> news_1 0.127 0.070 1.808 0.071 + 0.195#> ambiv_sexism_1 -0.229 0.122 -1.872 0.061 + -0.201
Conduct a poisson regression analysis for count data.
polcom %>% mutate(polarize= abs(therm_1-therm_2)) %>% tidy_regression(polarize~news_1+ambiv_sexism_1,type="poisson") %>% tidy_summary()#> # A tidy model#> Model formula : polarize ~ news_1 + ambiv_sexism_1#> Model type : Poisson regression#> Model pkg::fun : stats::glm()#> Model data : 242 (observations) X 3 (variables)#> $fit#> fit_stat n df estimate p.value stars#> χ2 242 239 6549.419 <.001 ***#> Δχ2 242 2 399.077 <.001 ***#> Nagelkerke R^2 242 - 0.808 -#> McFadden R^2 242 - 0.057 -#> RMSE 242 - 0.760 -#> AIC 242 - 7725.222 -#> BIC 242 - 7735.689 -#>#> $coef#> term est s.e. est.se p.value stars std.est#> (Intercept) 3.798 0.038 99.448 <.001 *** <.001#> news_1 0.045 0.005 9.358 <.001 *** 0.881#> ambiv_sexism_1 -0.126 0.008 -15.852 <.001 *** -2.230
Conduct a negative binomial regression analysis for overdispersed countdata.
polcom %>% mutate(polarize= abs(therm_1-therm_2)) %>% tidy_regression(polarize~news_1+ambiv_sexism_1,type="negbinom") %>% tidy_summary()#> # A tidy model#> Model formula : polarize ~ news_1 + ambiv_sexism_1#> Model type : Negative binomial regression#> Model pkg::fun : MASS::glm.nb()#> Model data : 242 (observations) X 3 (variables)#> $fit#> fit_stat n df estimate p.value stars#> χ2 242 239 293.328 0.009 **#> Δχ2 242 2 8.440 0.015 *#> Nagelkerke R^2 242 - 0.034 -#> McFadden R^2 242 - 0.028 -#> RMSE 242 - 0.761 -#> AIC 242 - 2312.391 -#> BIC 242 - 2326.347 -#>#> $coef#> term est s.e. est.se p.value stars std.est#> (Intercept) 3.741 0.258 14.510 <.001 *** 3.752#> news_1 0.053 0.032 1.632 0.103 0.113#> ambiv_sexism_1 -0.123 0.054 -2.273 0.023 * -0.158
polcom %>% mutate(polarize= abs(therm_1-therm_2)) %>% tidy_regression(polarize~news_1+ambiv_sexism_1,type="quasipoisson",robust=TRUE) %>% tidy_summary()#> # A tidy model#> Model formula : polarize ~ news_1 + ambiv_sexism_1#> Model type : [Robust] Poisson regression#> Model pkg::fun : robust::glmRob()#> Model data : 242 (observations) X 3 (variables)#> $fit#> fit_stat n df estimate p.value stars#> χ2 242 239 6989.543 <.001 ***#> Δχ2 242 2 58782.937 <.001 ***#> Nagelkerke R^2 242 - 1.000 -#> McFadden R^2 242 - 0.894 -#> RMSE 242 - 31.865 -#> AIC 242 - 2245.147 -#> BIC 242 - 2259.103 -#>#> $coef#> term est s.e. est.se p.value stars std.est#> (Intercept) 3.705 0.071 51.968 <.001 *** <.001#> news_1 0.079 0.010 8.325 <.001 *** 1.279#> ambiv_sexism_1 -0.241 0.022 -11.179 <.001 *** -2.086
Conduct an analysis of variance (ANOVA).
polcom %>% mutate(sex= ifelse(sex==1,"Male","Female"),vote_choice= case_when(vote_2016_choice==1~"Clinton",vote_2016_choice==2~"Trump",TRUE~"Other")) %>% tidy_anova(pp_party~sex*vote_choice) %>% tidy_summary()#> # A tidy model#> Model formula : pp_party ~ sex * vote_choice#> Model type : Analysis of variance (ANOVA)#> Model pkg::fun : stats::aov()#> Model data : 243 (observations) X 3 (variables)#> $fit#> fit_stat n df estimate p.value stars#> F 243 5 53.327 <.001 ***#> R^2 243 - 0.529 -#> Adj R^2 243 - 0.519 -#> RMSE 243 - 1.238 -#> AIC 243 - 801.115 -#> BIC 243 - 825.567 -#>#> $coef#> term est s.e. est.se statistic p.value stars std.est#> sex 1.000 19.238 19.238 12.561 <.001 *** 2.000#> vote_choice 2.000 388.606 194.303 126.867 <.001 *** 2.000#> sex:vote_choice 2.000 0.519 0.259 0.169 0.844 2.000#> Residuals 237.000 362.978 1.532 - - 237.000
polcom %>% tidy_ttest(pp_ideology~follow_trump) %>% tidy_summary()#> # A tidy model#> Model formula : pp_ideology ~ follow_trump#> Model type : T-test#> Model pkg::fun : stats::t.test()#> Model data : 244 (observations)#> $fit#> group df mean diff lo.95 hi.05#> FALSE 76.911 4.185 0.922 0.308 1.536#> TRUE 76.911 3.263 -0.922 -0.308 -1.536#>#> $coef#> est t p.value stars#> 0.922 2.992 0.004 **
Conduct latent variable analysis using structural equation modeling.
## mutate data and then specify and estimate modelsem1<-polcom %>% mutate(therm_2=therm_2/10,therm_1=10-therm_1/10) %>% tidy_sem_model(news=~news_1+news_2+news_3+news_4+news_5+news_6,ambiv_sexism=~ambiv_sexism_1+ambiv_sexism_2+ambiv_sexism_3+ambiv_sexism_4+ambiv_sexism_5+ambiv_sexism_6,partisan=~a*therm_1+a*therm_2,ambiv_sexism~age+sex+hhinc+edu+news+partisan) %>% tidy_sem()## print model summarysem1 %>% tidy_summary()#> # A tidy model#> Model formula : news =~ news_1 + news_2 + news_3 + news_4 + news_5 + news_6#> ambiv_sexism =~ ambiv_sexism_1 + ambiv_sexism_2 + ambiv_sexism_3 + ambiv_sexism_4 +#> ambiv_sexism_5 + ambiv_sexism_6#> partisan =~ a * therm_1 + a * therm_2#> ambiv_sexism ~ age + sex + hhinc + edu + news + partisan#> Model type : Structural Equation Model (SEM)#> Model pkg::fun : lavaan::sem()#> Model data : 235 (observations) X 18 (variables)#> $fit#> fit_stat n df estimate p.value stars#> chisq 235 127 239.579 <.001 ***#> aic 235 - 0.907 -#> bic 235 - 0.892 -#> cfi 235 - 16138.684 -#> tli 235 - 16256.310 -#> rmsea 235 - 0.061 -#> srmr 235 - 0.073 -#> R^2:ambiv_sexism 235 - 0.379 -#>#> $coef#> term est se est.se p.value stars std.est#> news =~ news_1 1.000 <.001 - - 0.173#> news =~ news_2 1.592 0.722 2.204 0.028 * 0.340#> news =~ news_3 5.069 2.095 2.419 0.016 * 0.781#> news =~ news_4 5.587 2.312 2.417 0.016 * 0.851#> news =~ news_5 3.493 1.485 2.353 0.019 * 0.520#> news =~ news_6 1.255 0.683 1.838 0.066 + 0.196#> ambiv_sexism =~ ambiv_sexism_1 1.000 <.001 - - 0.825#> ambiv_sexism =~ ambiv_sexism_2 0.942 0.067 14.043 <.001 *** 0.801#> ambiv_sexism =~ ambiv_sexism_3 0.795 0.067 11.844 <.001 *** 0.706#> ambiv_sexism =~ ambiv_sexism_4 0.743 0.064 11.647 <.001 *** 0.697#> ambiv_sexism =~ ambiv_sexism_5 0.902 0.062 14.644 <.001 *** 0.825#> ambiv_sexism =~ ambiv_sexism_6 0.904 0.064 14.185 <.001 *** 0.807#> partisan =~ therm_1 1.000 <.001 - - 0.577#> partisan =~ therm_2 1.000 <.001 - - 0.592#> ambiv_sexism ~ age -0.004 0.005 -0.824 0.410 -0.051#> ambiv_sexism ~ sex -0.271 0.130 -2.089 0.037 * -0.130#> ambiv_sexism ~ hhinc -0.021 0.023 -0.878 0.380 -0.057#> ambiv_sexism ~ edu -0.088 0.069 -1.279 0.201 -0.083#> ambiv_sexism ~ news 0.130 0.215 0.607 0.544 0.047#> ambiv_sexism ~ partisan 0.347 0.069 5.032 <.001 *** 0.592
Estimate multilevel (mixed effects) models.
lme4::sleepstudy %>% tidy_mlm(Reaction~Days+ (Days|Subject)) %>% summary()#> Linear mixed model fit by REML ['lmerMod']#> Formula: Reaction ~ Days + (Days | Subject)#> Data: .data#>#> REML criterion at convergence: 1743.6#>#> Scaled residuals:#> Min 1Q Median 3Q Max#> -3.954 -0.463 0.023 0.463 5.179#>#> Random effects:#> Groups Name Variance Std.Dev. Corr#> Subject (Intercept) 612.1 24.74#> Days 35.1 5.92 0.07#> Residual 654.9 25.59#> Number of obs: 180, groups: Subject, 18#>#> Fixed effects:#> Estimate Std. Error t value#> (Intercept) 251.41 6.82 36.84#> Days 10.47 1.55 6.77#>#> Correlation of Fixed Effects:#> (Intr)#> Days -0.138
Comes with one data set.
Consists of survey responses to demographic, background, and likert-typeattitudinal items about political communication.
print(tibble::as_tibble(polcom),n=5)#> # A tibble: 244 x 63#> follow_trump news_1 news_2 news_3 news_4 news_5 news_6 ambiv_sexism_1 ambiv_sexism_2#> * <lgl> <int> <int> <int> <int> <int> <int> <int> <int>#> 1 TRUE 8 1 1 1 1 6 3 3#> 2 TRUE 1 1 1 1 1 1 5 5#> 3 TRUE 8 1 1 1 8 1 5 4#> 4 TRUE 8 1 1 1 1 6 2 2#> 5 TRUE 6 1 2 1 1 3 4 4#> # ... with 239 more rows, and 54 more variables: ambiv_sexism_3 <int>, ambiv_sexism_4 <int>,#> # ambiv_sexism_5 <int>, ambiv_sexism_6 <int>, img1_hrc_1 <int>, img1_hrc_2 <dbl>,#> # img1_hrc_3 <int>, img1_hrc_4 <dbl>, img1_hrc_5 <int>, img1_hrc_6 <int>, img1_hrc_7 <int>,#> # img1_hrc_8 <int>, img1_hrc_9 <int>, img2_hrc_10 <int>, img2_hrc_11 <int>, img2_hrc_12 <dbl>,#> # img2_hrc_13 <int>, img2_hrc_14 <int>, img2_hrc_15 <dbl>, img1_djt_1 <int>, img1_djt_2 <dbl>,#> # img1_djt_3 <int>, img1_djt_4 <dbl>, img1_djt_5 <int>, img1_djt_6 <int>, img1_djt_7 <int>,#> # img1_djt_8 <int>, img1_djt_9 <int>, img2_djt_10 <int>, img2_djt_11 <int>, img2_djt_12 <dbl>,#> # img2_djt_13 <int>, img2_djt_14 <int>, img2_djt_15 <dbl>, pie_1 <int>, pie_2 <int>, pie_3 <int>,#> # pie_4 <int>, vote_2016 <int>, vote_2016_choice <int>, pp_ideology <int>, pp_party <int>,#> # pp_party_lean <int>, therm_1 <int>, therm_2 <int>, therm_3 <int>, therm_4 <int>, therm_5 <int>,#> # age <int>, sex <int>, gender <int>, race <int>, edu <int>, hhinc <int>
Return summary statistics in the form of a data frame(not yetadded).
## summary stats for social media use (numeric) variablessummarize_numeric(polcom_survey,smuse1:smuse3)## summary stats for respondent sex and race (categorical) variablessummarize_categorical(polcom_survey,sex,race)
Estimate Cronbach’s alpha for a set of variables.
## reliability of social media use itemscronbachs_alpha(polcom,ambiv_sexism_1:ambiv_sexism_6)#> items alpha alpha.std#> 1 ambiv_sexism_1:ambiv_sexism_6 0.904609 0.904600#> 2 -ambiv_sexism_1 0.882322 0.882225#> 3 -ambiv_sexism_2 0.884272 0.884121#> 4 -ambiv_sexism_3 0.896061 0.896218#> 5 -ambiv_sexism_4 0.897127 0.897411#> 6 -ambiv_sexism_5 0.883554 0.883420#> 7 -ambiv_sexism_6 0.881595 0.881855
About
🎓 Tidy tools for academics
Topics
Resources
License
Unknown, MIT licenses found
Licenses found
Uh oh!
There was an error while loading.Please reload this page.
Stars
Watchers
Forks
Releases
Packages0
Uh oh!
There was an error while loading.Please reload this page.
Contributors2
Uh oh!
There was an error while loading.Please reload this page.
