We plot the data and can see that there is no obvious large difference between the debt levels or scenarios.
likert.data <- d.both_completed %>%
select(high_debt_version, quality_pre_task)
likert.data$quality_pre_task <- revalue(likert.data$quality_pre_task, c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
))
likert.data$high_debt_version <- revalue(likert.data$high_debt_version, c(
"true"="High Debt",
"false"="Low Debt"
))
ggplot(likert.data, aes(x=quality_pre_task)) +
geom_bar(fill= "Light Blue") +
facet_grid(rows = vars(high_debt_version)) +
scale_y_continuous(limits = NULL, breaks = c(2,4,6,8), labels = c("2","4","6","8")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
likert.data <- d.both_completed %>%
select(scenario, quality_pre_task)
likert.data$quality_pre_task <- revalue(likert.data$quality_pre_task, c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
))
ggplot(likert.data, aes(x=quality_pre_task)) +
geom_bar(fill= "Light Blue") +
facet_grid(rows = vars(scenario)) +
scale_y_continuous(limits = NULL, breaks = c(2,4,6,8), labels = c("2","4","6","8")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
As the data is collected from a likert scale we will use a cumulative family, indicating that each level on the scale is an incremental step. This model is also able to fit the data well.
We include high_debt_verison
as a predictor in our model
as this variable represent the very effect we want to measure. We also
include a varying intercept for each individual to prevent the model
from learning too much from single participants with extreme
measurements.
We iterate over the model until we have sane priors.
scenario_quality.with <- extendable_model(
base_name = "scenario_quality",
base_formula = "quality_pre_task ~ 1 + high_debt_version + (1 | session)",
base_priors = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = d.both_completed,
)
prior_summary(scenario_quality.with(only_priors= TRUE))
prior_summary(scenario_quality.with(sample_prior = "only"))
pp_check(scenario_quality.with(sample_prior = "only"), nsamples = 200, type = "bars")
We choose a beta parameter priors allowing for the beta parameter to account for 100% of the effect but that is skeptical to such strong effects from the beta parameter.
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 0, 2)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100
data.frame(x = sim.beta.diff) %>%
ggplot(aes(x)) +
geom_density() +
xlim(-150, 150) +
labs(
title = "Beta parameter prior influence",
x = "Estimate with beta as % of estimate without beta",
y = "Density"
)
We check the posterior distribution and can see that the model seems to have been able to fit the data well. Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.
pp_check(scenario_quality.with(), nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
summary(scenario_quality.with())
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(data) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.36 0.29 0.01 1.08 1.00 2157 2375
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -1.90 0.55 -3.03 -0.87 1.00 4722
## Intercept[2] -0.56 0.42 -1.40 0.25 1.00 6267
## Intercept[3] 0.22 0.42 -0.59 1.03 1.00 5807
## Intercept[4] 0.62 0.43 -0.24 1.48 1.00 5701
## Intercept[5] 1.91 0.49 0.96 2.89 1.00 5077
## Intercept[6] 3.94 0.72 2.62 5.50 1.00 5851
## high_debt_versionfalse 1.38 0.51 0.38 2.40 1.00 6286
## Tail_ESS
## Intercept[1] 2746
## Intercept[2] 3602
## Intercept[3] 3569
## Intercept[4] 3310
## Intercept[5] 2753
## Intercept[6] 3086
## high_debt_versionfalse 2942
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(scenario_quality.with(), ask = FALSE)
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
We use loo
to check some possible extensions on the
model.
loo_result <- loo(
# Benchmark model(s)
scenario_quality.with(),
# New model(s)
scenario_quality.with("work_domain"),
scenario_quality.with("work_experience_programming.s"),
scenario_quality.with("work_experience_java.s"),
scenario_quality.with("education_field"),
scenario_quality.with("mo(education_level)", edlvl_prior),
scenario_quality.with("workplace_peer_review"),
scenario_quality.with("workplace_td_tracking"),
scenario_quality.with("workplace_pair_programming"),
scenario_quality.with("workplace_coding_standards"),
scenario_quality.with("scenario"),
scenario_quality.with("group")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## scenario_quality.with("workplace_coding_standards") 0.0 0.0
## scenario_quality.with("work_experience_programming.s") -0.1 1.7
## scenario_quality.with("workplace_peer_review") -0.4 1.2
## scenario_quality.with("work_experience_java.s") -0.5 1.8
## scenario_quality.with() -1.1 1.3
## scenario_quality.with("mo(education_level)", edlvl_prior) -1.1 1.3
## scenario_quality.with("workplace_pair_programming") -1.6 1.4
## scenario_quality.with("scenario") -1.8 1.4
## scenario_quality.with("workplace_td_tracking") -1.8 1.4
## scenario_quality.with("education_field") -2.1 1.5
## scenario_quality.with("group") -2.5 1.3
## scenario_quality.with("work_domain") -3.1 1.7
loo_result[1]
## $loos
## $loos$`scenario_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.3 4.1
## p_loo 8.4 0.9
## looic 162.7 8.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1548
## (0.5, 0.7] (ok) 1 2.3% 2108
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_domain")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -83.3 4.4
## p_loo 13.3 1.7
## looic 166.6 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1014
## (0.5, 0.7] (ok) 1 2.3% 807
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.3 4.2
## p_loo 9.2 0.9
## looic 160.7 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.8 4.2
## p_loo 9.2 0.9
## looic 161.5 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.3 4.1
## p_loo 10.0 1.1
## looic 164.6 8.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1821
## (0.5, 0.7] (ok) 1 2.3% 4107
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.4 4.2
## p_loo 9.3 1.0
## looic 162.7 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1403
## (0.5, 0.7] (ok) 1 2.3% 3344
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 8.9 0.9
## looic 161.3 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.0 4.1
## p_loo 9.5 1.0
## looic 164.1 8.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1345
## (0.5, 0.7] (ok) 1 2.3% 1546
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.8 4.2
## p_loo 9.4 1.0
## looic 163.6 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1677
## (0.5, 0.7] (ok) 2 4.5% 1812
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.2
## p_loo 8.7 0.9
## looic 160.5 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.0 4.0
## p_loo 9.3 0.9
## looic 164.1 8.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("group")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -82.7 4.4
## p_loo 11.0 1.2
## looic 165.4 8.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 1180
## (0.5, 0.7] (ok) 2 4.5% 4832
## (0.7, 1] (bad) 1 2.3% 408
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
scenario_quality.with(),
scenario_quality.with("work_experience_programming.s"),
scenario_quality.with("workplace_coding_standards"),
scenario_quality.with("workplace_peer_review"),
scenario_quality.with("work_experience_java.s"),
# New model(s)
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")),
scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")),
scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s")),
scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review")),
scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")),
scenario_quality.with(c("workplace_peer_review", "work_experience_java.s"))
)
loo_result[2]
## $diffs
## elpd_diff
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) 0.0
## scenario_quality.with("workplace_coding_standards") 0.0
## scenario_quality.with("work_experience_programming.s") -0.1
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) -0.3
## scenario_quality.with("workplace_peer_review") -0.4
## scenario_quality.with(c("workplace_peer_review", "work_experience_java.s")) -0.5
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s")) -0.5
## scenario_quality.with("work_experience_java.s") -0.5
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) -0.5
## scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review")) -0.6
## scenario_quality.with() -1.1
## se_diff
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) 0.0
## scenario_quality.with("workplace_coding_standards") 1.3
## scenario_quality.with("work_experience_programming.s") 0.9
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) 0.4
## scenario_quality.with("workplace_peer_review") 1.7
## scenario_quality.with(c("workplace_peer_review", "work_experience_java.s")) 1.0
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s")) 0.9
## scenario_quality.with("work_experience_java.s") 1.1
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) 0.8
## scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review")) 1.3
## scenario_quality.with() 1.9
loo_result[1]
## $loos
## $loos$`scenario_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.3 4.1
## p_loo 8.4 0.9
## looic 162.7 8.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1548
## (0.5, 0.7] (ok) 1 2.3% 2108
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.3 4.2
## p_loo 9.2 0.9
## looic 160.7 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.2
## p_loo 8.7 0.9
## looic 160.5 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 8.9 0.9
## looic 161.3 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.8 4.2
## p_loo 9.2 0.9
## looic 161.5 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.3
## p_loo 9.6 0.9
## looic 160.4 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1920
## (0.5, 0.7] (ok) 1 2.3% 4737
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.8 4.4
## p_loo 9.9 0.9
## looic 161.5 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1832
## (0.5, 0.7] (ok) 1 2.3% 3228
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 9.6 1.0
## looic 161.4 8.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.8 4.3
## p_loo 9.5 1.0
## looic 161.6 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1684
## (0.5, 0.7] (ok) 1 2.3% 1410
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.6 4.3
## p_loo 9.8 1.0
## looic 161.1 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 1701
## (0.5, 0.7] (ok) 3 6.8% 2998
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_peer_review", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.4
## p_loo 9.7 1.0
## looic 161.3 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1574
## (0.5, 0.7] (ok) 1 2.3% 3923
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
scenario_quality.with(),
scenario_quality.with("work_experience_programming.s"),
scenario_quality.with("workplace_coding_standards"),
scenario_quality.with("workplace_peer_review"),
scenario_quality.with("work_experience_java.s"),
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")),
scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")),
scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")),
# New model(s)
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review")),
scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s")),
scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review")),
scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review"))
)
loo_result[2]
## $diffs
## elpd_diff
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) 0.0
## scenario_quality.with("workplace_coding_standards") 0.0
## scenario_quality.with("work_experience_programming.s") -0.1
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) -0.3
## scenario_quality.with("workplace_peer_review") -0.4
## scenario_quality.with("work_experience_java.s") -0.5
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) -0.5
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review")) -0.7
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s")) -0.7
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review")) -0.9
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review")) -1.0
## scenario_quality.with() -1.1
## se_diff
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards")) 0.0
## scenario_quality.with("workplace_coding_standards") 1.3
## scenario_quality.with("work_experience_programming.s") 0.9
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s")) 0.4
## scenario_quality.with("workplace_peer_review") 1.7
## scenario_quality.with("work_experience_java.s") 1.1
## scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review")) 0.8
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review")) 0.3
## scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s")) 0.2
## scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review")) 0.6
## scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review")) 0.8
## scenario_quality.with() 1.9
loo_result[1]
## $loos
## $loos$`scenario_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.3 4.1
## p_loo 8.4 0.9
## looic 162.7 8.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1548
## (0.5, 0.7] (ok) 1 2.3% 2108
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.3 4.2
## p_loo 9.2 0.9
## looic 160.7 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.2
## p_loo 8.7 0.9
## looic 160.5 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.7 4.3
## p_loo 8.9 0.9
## looic 161.3 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.8 4.2
## p_loo 9.2 0.9
## looic 161.5 8.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.2 4.3
## p_loo 9.6 0.9
## looic 160.4 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1920
## (0.5, 0.7] (ok) 1 2.3% 4737
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.8 4.4
## p_loo 9.9 0.9
## looic 161.5 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1832
## (0.5, 0.7] (ok) 1 2.3% 3228
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.6 4.3
## p_loo 9.8 1.0
## looic 161.1 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 1701
## (0.5, 0.7] (ok) 3 6.8% 2998
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.4
## p_loo 10.3 1.0
## looic 161.8 8.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1565
## (0.5, 0.7] (ok) 2 4.5% 3778
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "workplace_coding_standards", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -80.9 4.3
## p_loo 10.3 1.0
## looic 161.8 8.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 1810
## (0.5, 0.7] (ok) 2 4.5% 1245
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("work_experience_programming.s", "work_experience_java.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.2 4.4
## p_loo 10.4 1.1
## looic 162.4 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`scenario_quality.with(c("workplace_coding_standards", "work_experience_java.s", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -81.2 4.4
## p_loo 10.5 1.1
## looic 162.3 8.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 43 97.7% 1255
## (0.5, 0.7] (ok) 1 2.3% 1744
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
We pick some of our top performing models as candidates and inspect them closer.
The candidate models are named and listed in order of complexity.
We select the simplest model as a baseline.
scenario_quality0 <- brm(
"quality_pre_task ~ 1 + high_debt_version + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality0",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality0)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.36 0.29 0.01 1.08 1.00 2157 2375
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -1.90 0.55 -3.03 -0.87 1.00 4722
## Intercept[2] -0.56 0.42 -1.40 0.25 1.00 6267
## Intercept[3] 0.22 0.42 -0.59 1.03 1.00 5807
## Intercept[4] 0.62 0.43 -0.24 1.48 1.00 5701
## Intercept[5] 1.91 0.49 0.96 2.89 1.00 5077
## Intercept[6] 3.94 0.72 2.62 5.50 1.00 5851
## high_debt_versionfalse 1.38 0.51 0.38 2.40 1.00 6286
## Tail_ESS
## Intercept[1] 2746
## Intercept[2] 3602
## Intercept[3] 3569
## Intercept[4] 3310
## Intercept[5] 2753
## Intercept[6] 3086
## high_debt_versionfalse 2942
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality0)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.129903125 0.4188857 -1.2299003 0.5809392
## 6033d90a5af2c702367b3a96 0.009332742 0.3950992 -0.8300987 0.8522972
## 6034fc165af2c702367b3a98 -0.011157968 0.3905769 -0.8681339 0.8054951
## 603500725af2c702367b3a99 0.270396793 0.5335423 -0.4259196 1.7889557
## 603f97625af2c702367b3a9d -0.024159053 0.3859246 -0.8598157 0.7813736
## 603fd5d95af2c702367b3a9e 0.042621048 0.4100073 -0.7859991 1.0106544
## 60409b7b5af2c702367b3a9f -0.034372972 0.4106800 -0.9725467 0.7755178
## 604b82b5a7718fbed181b336 0.217655146 0.4817231 -0.4441285 1.5300886
## 6050c1bf856f36729d2e5218 -0.111418626 0.4153685 -1.1348366 0.6217000
## 6050e1e7856f36729d2e5219 0.024062186 0.4305651 -0.8408798 1.0065099
## 6055fdc6856f36729d2e521b 0.015782909 0.4060000 -0.8626181 0.9303131
## 60589862856f36729d2e521f -0.152063085 0.4395170 -1.3093359 0.5557654
## 605afa3a856f36729d2e5222 -0.019792100 0.3886462 -0.9549447 0.7941260
## 605c8bc6856f36729d2e5223 -0.153030454 0.4664495 -1.3579791 0.6397383
## 605f3f2d856f36729d2e5224 -0.159242583 0.4432931 -1.2930023 0.5596181
## 605f46c3856f36729d2e5225 0.052246427 0.3998679 -0.7400494 0.9765701
## 60605337856f36729d2e5226 -0.079268998 0.4080155 -1.0847426 0.7019991
## 60609ae6856f36729d2e5228 0.012534538 0.4034100 -0.8579773 0.9486387
## 6061ce91856f36729d2e522e 0.010486655 0.4007665 -0.8477178 0.9114304
## 6061f106856f36729d2e5231 0.234240817 0.5071045 -0.4486812 1.6131326
## 6068ea9f856f36729d2e523e -0.056338226 0.4066959 -0.9792076 0.7726060
## 6075ab05856f36729d2e5247 0.059047385 0.3845733 -0.7120678 0.9804833
plot(scenario_quality0, ask = FALSE)
pp_check(scenario_quality0, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
We select the best performing model with one variable.
scenario_quality1 <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality1",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality1)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.34 0.28 0.01 1.06 1.00 2480 2292
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.98 0.53 -3.10 -1.00 1.00
## Intercept[2] -0.55 0.42 -1.40 0.29 1.01
## Intercept[3] 0.25 0.43 -0.60 1.06 1.00
## Intercept[4] 0.65 0.43 -0.21 1.51 1.00
## Intercept[5] 1.97 0.48 1.04 2.95 1.00
## Intercept[6] 4.07 0.74 2.72 5.68 1.00
## high_debt_versionfalse 1.45 0.53 0.41 2.50 1.00
## work_experience_programming.s -0.54 0.30 -1.15 0.03 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 4561 2906
## Intercept[2] 7037 2959
## Intercept[3] 6509 3466
## Intercept[4] 6227 3494
## Intercept[5] 5972 3377
## Intercept[6] 6730 3140
## high_debt_versionfalse 7589 2913
## work_experience_programming.s 7091 2980
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality1)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.156324093 0.4228048 -1.2898998 0.4942763
## 6033d90a5af2c702367b3a96 -0.012759481 0.3944357 -0.8916087 0.8954872
## 6034fc165af2c702367b3a98 -0.046555071 0.3837114 -0.9378860 0.7432463
## 603500725af2c702367b3a99 0.229364484 0.5146324 -0.4314582 1.6567989
## 603f97625af2c702367b3a9d -0.056216128 0.3681785 -0.9269026 0.6919428
## 603fd5d95af2c702367b3a9e 0.014045577 0.4130713 -0.8307097 0.9153825
## 60409b7b5af2c702367b3a9f -0.072860414 0.3952675 -1.0628554 0.6589539
## 604b82b5a7718fbed181b336 0.193177759 0.4527226 -0.4613786 1.4454478
## 6050c1bf856f36729d2e5218 -0.082122079 0.3955144 -1.0687864 0.6205536
## 6050e1e7856f36729d2e5219 0.008848661 0.4292889 -0.9418675 0.9254945
## 6055fdc6856f36729d2e521b -0.023154826 0.3803876 -0.8903244 0.7815592
## 60589862856f36729d2e521f -0.044222350 0.4072477 -1.0242237 0.7698421
## 605afa3a856f36729d2e5222 0.073201385 0.3744533 -0.6355949 1.0288080
## 605c8bc6856f36729d2e5223 -0.129958314 0.4177075 -1.1933380 0.5445543
## 605f3f2d856f36729d2e5224 -0.011700803 0.4144657 -0.9096976 0.8629951
## 605f46c3856f36729d2e5225 0.031900177 0.3946281 -0.7760796 0.9358086
## 60605337856f36729d2e5226 -0.096617375 0.3953706 -1.1020617 0.6347367
## 60609ae6856f36729d2e5228 -0.021687185 0.3902530 -0.9279962 0.8087896
## 6061ce91856f36729d2e522e -0.020532041 0.3917740 -0.8967069 0.8195523
## 6061f106856f36729d2e5231 0.186195354 0.4575356 -0.4716529 1.4376291
## 6068ea9f856f36729d2e523e -0.060923418 0.4198880 -1.0576470 0.7425363
## 6075ab05856f36729d2e5247 0.045584175 0.3869492 -0.7113120 0.9555720
plot(scenario_quality1, ask = FALSE)
pp_check(scenario_quality1, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
We select the best performing model with two variables.
scenario_quality2 <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality2",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality2)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.35 0.28 0.01 1.04 1.00 1769 1950
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.73 0.59 -2.94 -0.66 1.00
## Intercept[2] -0.30 0.47 -1.20 0.62 1.00
## Intercept[3] 0.50 0.48 -0.43 1.44 1.00
## Intercept[4] 0.91 0.49 -0.02 1.88 1.00
## Intercept[5] 2.26 0.54 1.22 3.36 1.00
## Intercept[6] 4.39 0.77 2.95 5.99 1.00
## high_debt_versionfalse 1.46 0.51 0.46 2.48 1.00
## work_experience_programming.s -0.44 0.31 -1.07 0.16 1.00
## workplace_coding_standardsfalse 0.56 0.53 -0.48 1.62 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 4925 3390
## Intercept[2] 5924 3620
## Intercept[3] 5392 2949
## Intercept[4] 5358 3352
## Intercept[5] 4406 3108
## Intercept[6] 5492 3535
## high_debt_versionfalse 6701 2956
## work_experience_programming.s 4789 2793
## workplace_coding_standardsfalse 4831 2512
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality2)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.185747243 0.4365202 -1.3625338 0.4856064
## 6033d90a5af2c702367b3a96 0.004219249 0.3926383 -0.8605150 0.8819449
## 6034fc165af2c702367b3a98 -0.068997948 0.3926284 -1.0393832 0.6888253
## 603500725af2c702367b3a99 0.220885581 0.5082568 -0.4754369 1.6086947
## 603f97625af2c702367b3a9d -0.021591877 0.3820250 -0.8993056 0.7584072
## 603fd5d95af2c702367b3a9e 0.006671980 0.4176682 -0.8457328 0.9516715
## 60409b7b5af2c702367b3a9f -0.091684204 0.3871800 -1.0338906 0.6413740
## 604b82b5a7718fbed181b336 0.179039062 0.4618139 -0.5307246 1.4183830
## 6050c1bf856f36729d2e5218 -0.066804230 0.3945816 -1.0389064 0.6825417
## 6050e1e7856f36729d2e5219 -0.003540173 0.4150383 -0.9453250 0.8973239
## 6055fdc6856f36729d2e521b 0.019478872 0.3890875 -0.8141966 0.8991186
## 60589862856f36729d2e521f -0.030413659 0.4033890 -0.9310910 0.8388175
## 605afa3a856f36729d2e5222 0.094563284 0.4019942 -0.6453876 1.1210384
## 605c8bc6856f36729d2e5223 -0.107294429 0.4280647 -1.1843492 0.6373083
## 605f3f2d856f36729d2e5224 0.001413917 0.4277264 -0.9191924 0.8851699
## 605f46c3856f36729d2e5225 0.077048402 0.3874171 -0.6734111 1.0115066
## 60605337856f36729d2e5226 -0.064654146 0.3961882 -1.0375038 0.7130141
## 60609ae6856f36729d2e5228 -0.039800926 0.3721491 -0.8923937 0.7130692
## 6061ce91856f36729d2e522e 0.013653238 0.3956862 -0.8068239 0.9047648
## 6061f106856f36729d2e5231 0.162193709 0.4452366 -0.5149623 1.3327140
## 6068ea9f856f36729d2e523e -0.071444015 0.4027677 -1.0328201 0.7013173
## 6075ab05856f36729d2e5247 0.019111441 0.3850891 -0.8156413 0.9314004
plot(scenario_quality2, ask = FALSE)
pp_check(scenario_quality2, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
We select the best performing model with three variables.
scenario_quality3 <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + work_experience_java.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/scenario_quality3",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality3)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + workplace_coding_standards + work_experience_java.s + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.35 0.28 0.01 1.02 1.00 2232 1866
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.75 0.60 -3.00 -0.64 1.00
## Intercept[2] -0.31 0.49 -1.29 0.62 1.00
## Intercept[3] 0.50 0.49 -0.46 1.46 1.00
## Intercept[4] 0.90 0.51 -0.09 1.89 1.00
## Intercept[5] 2.25 0.57 1.17 3.40 1.00
## Intercept[6] 4.39 0.80 2.92 6.09 1.00
## high_debt_versionfalse 1.46 0.50 0.47 2.45 1.00
## work_experience_programming.s -0.38 0.63 -1.62 0.86 1.00
## workplace_coding_standardsfalse 0.55 0.55 -0.51 1.65 1.00
## work_experience_java.s -0.09 0.62 -1.29 1.16 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5223 3396
## Intercept[2] 7157 3799
## Intercept[3] 6025 3323
## Intercept[4] 5696 3314
## Intercept[5] 5098 2636
## Intercept[6] 5784 2883
## high_debt_versionfalse 5117 2728
## work_experience_programming.s 3161 2898
## workplace_coding_standardsfalse 6725 2695
## work_experience_java.s 3395 2866
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality3)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.183968515 0.4424785 -1.3355809 0.4932503
## 6033d90a5af2c702367b3a96 0.006153712 0.3817561 -0.8123480 0.8944321
## 6034fc165af2c702367b3a98 -0.069662603 0.3812510 -0.9480196 0.7061861
## 603500725af2c702367b3a99 0.212597291 0.5057904 -0.4791792 1.6075705
## 603f97625af2c702367b3a9d -0.029453586 0.3944049 -0.9498690 0.7747838
## 603fd5d95af2c702367b3a9e 0.005341444 0.3903356 -0.8431430 0.9020765
## 60409b7b5af2c702367b3a9f -0.104888010 0.4023436 -1.1248769 0.5933679
## 604b82b5a7718fbed181b336 0.181406366 0.4516697 -0.5215662 1.3454235
## 6050c1bf856f36729d2e5218 -0.060531758 0.3998653 -1.0009524 0.7228178
## 6050e1e7856f36729d2e5219 0.001218971 0.3970731 -0.8812060 0.8599051
## 6055fdc6856f36729d2e521b 0.003557873 0.3868659 -0.8264013 0.8420175
## 60589862856f36729d2e521f -0.029938970 0.4183108 -1.0651721 0.8023106
## 605afa3a856f36729d2e5222 0.099103740 0.3948460 -0.5896966 1.0703840
## 605c8bc6856f36729d2e5223 -0.107485145 0.4162012 -1.1875757 0.6199258
## 605f3f2d856f36729d2e5224 -0.002514301 0.4288634 -0.9433927 0.9410924
## 605f46c3856f36729d2e5225 0.058492104 0.4022862 -0.7093513 1.0085691
## 60605337856f36729d2e5226 -0.068200670 0.3901694 -0.9970055 0.6752643
## 60609ae6856f36729d2e5228 -0.037222949 0.3923003 -0.9739810 0.7488668
## 6061ce91856f36729d2e522e 0.011729049 0.3972338 -0.8238957 0.8633298
## 6061f106856f36729d2e5231 0.168208896 0.4488761 -0.5164507 1.3794813
## 6068ea9f856f36729d2e523e -0.077187951 0.4161270 -1.1091213 0.6391310
## 6075ab05856f36729d2e5247 0.015634150 0.3905241 -0.7791451 0.8986069
plot(scenario_quality3, ask = FALSE)
pp_check(scenario_quality3, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
All candidate models look nice, none is significantly better than the
others, we will proceed the model containing work experince as it
otherwise ourd be added in the next step:
scenario_quality1
Some participants did only complete one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.
scenario_quality1.all <- brm(
"quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 2), class = "Intercept"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.completed),
file = "fits/scenario_quality1.all",
file_refit = "on_change",
seed = 20210421
)
summary(scenario_quality1.all)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_pre_task ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.33 0.26 0.01 0.98 1.00 2031 2303
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -2.07 0.53 -3.23 -1.12 1.00
## Intercept[2] -0.59 0.40 -1.38 0.16 1.00
## Intercept[3] 0.29 0.39 -0.48 1.05 1.00
## Intercept[4] 0.80 0.40 0.03 1.60 1.00
## Intercept[5] 2.07 0.45 1.21 2.97 1.00
## Intercept[6] 4.20 0.72 2.87 5.73 1.00
## high_debt_versionfalse 1.41 0.46 0.53 2.32 1.00
## work_experience_programming.s -0.53 0.28 -1.09 -0.01 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5364 3202
## Intercept[2] 7002 3150
## Intercept[3] 5686 3185
## Intercept[4] 5096 2788
## Intercept[5] 4930 3176
## Intercept[6] 5691 3121
## high_debt_versionfalse 6402 3014
## work_experience_programming.s 6128 2673
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(scenario_quality1.all)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -0.036344018 0.3778096 -0.9443224 0.7505782
## 6033d69a5af2c702367b3a95 -0.143880160 0.4067284 -1.2252806 0.5083088
## 6033d90a5af2c702367b3a96 -0.012651518 0.3629096 -0.8817472 0.7549010
## 6034fc165af2c702367b3a98 -0.026550109 0.3595660 -0.8477204 0.7387046
## 603500725af2c702367b3a99 0.216037415 0.4604108 -0.4222454 1.4852549
## 603f84f15af2c702367b3a9b -0.003780082 0.3803829 -0.8326188 0.8278322
## 603f97625af2c702367b3a9d -0.040929735 0.3666652 -0.8933498 0.7011871
## 603fd5d95af2c702367b3a9e 0.014026924 0.3845764 -0.8030918 0.8580742
## 60409b7b5af2c702367b3a9f -0.044290239 0.3568668 -0.8463546 0.6859541
## 604b82b5a7718fbed181b336 0.199405266 0.4540252 -0.4478257 1.4098924
## 604f1239a7718fbed181b33f 0.021776699 0.3898698 -0.7922127 0.9064980
## 6050c1bf856f36729d2e5218 -0.070180778 0.3807548 -0.9952862 0.6526994
## 6050e1e7856f36729d2e5219 0.006824639 0.4221551 -0.9162308 0.9179286
## 6055fdc6856f36729d2e521b -0.011133238 0.3782325 -0.8731314 0.7928925
## 60579f2a856f36729d2e521e -0.150126094 0.4693970 -1.4194407 0.6014127
## 60589862856f36729d2e521f -0.029848559 0.4007633 -0.9316652 0.7944116
## 605a30a7856f36729d2e5221 0.091605269 0.4086198 -0.6576940 1.0914805
## 605afa3a856f36729d2e5222 0.098228662 0.4069549 -0.6159270 1.0997090
## 605c8bc6856f36729d2e5223 -0.104956218 0.3984449 -1.0980014 0.6093432
## 605f3f2d856f36729d2e5224 0.016485113 0.4004776 -0.8215100 0.8891029
## 605f46c3856f36729d2e5225 0.044105277 0.3635976 -0.7131632 0.8564480
## 60605337856f36729d2e5226 -0.090580836 0.3784690 -1.0864748 0.5999590
## 60609ae6856f36729d2e5228 -0.021462566 0.3819059 -0.8899594 0.8060843
## 6061ce91856f36729d2e522e -0.016937451 0.3701515 -0.8359979 0.7785221
## 6061f106856f36729d2e5231 0.184549114 0.4486086 -0.4788589 1.4117604
## 60672faa856f36729d2e523c -0.061977456 0.3994896 -1.0018200 0.7256522
## 6068ea9f856f36729d2e523e -0.041880481 0.3881958 -0.9686586 0.7371543
## 606db69d856f36729d2e5243 -0.046806836 0.3703962 -0.9090831 0.6711855
## 6075ab05856f36729d2e5247 0.061664861 0.3751948 -0.6628332 1.0107697
plot(scenario_quality1.all, ask = FALSE)
pp_check(scenario_quality1.all, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
This means that our final model, with all data points and experience
predictors, is scenario_quality1.all
To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.
mcmc_areas(scenario_quality1.all, pars = c("b_high_debt_versionfalse", "b_work_experience_programming.s"), prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c("High debt version: false", "Professional programming experience")) +
ggtitle("Beta parameters densities in scenario quality model", subtitle = "Shaded region marks 95% of the density. Line marks the median")
scale_programming_experience <- function(x) {
(x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}
post_settings <- expand.grid(
high_debt_version = c("false", "true"),
session = NA,
work_experience_programming.s = sapply(c(0, 3, 10, 25, 40), scale_programming_experience)
)
post <- posterior_predict(scenario_quality1.all, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
select(
estimate,
high_debt_version,
work_experience_programming
)%>%
mutate(estimate = estimate)
post.nice <- post %>% mutate_at("estimate", function(x) revalue(as.ordered(x), c(
"1"="Very Bad",
"2"="Bad",
"3"="Somewhat Bad",
"4"="Neutral",
"5"="Somewhat Good",
"6"="Good",
"7"="Very Good"
)))
post.nice$high_debt_version <- revalue(post.nice$high_debt_version, c(
"true"="High Debt",
"false"="Low Debt"
))
post.nice.3 <- filter(post.nice, work_experience_programming == 3)
vline.data.3 <- post.nice.3 %>%
group_by(high_debt_version) %>%
summarize(z = mean(as.numeric(estimate)))
sprintf("Estimations for 3 years experience")
## [1] "Estimations for 3 years experience"
post.nice.3 %>%
ggplot() +
geom_histogram(aes(x=estimate),fill= "Light Blue", stat = "count") +
geom_vline(aes(xintercept = z),
vline.data.3,
col = "Dark Blue",
lwd = 1)+
facet_grid(rows = vars(high_debt_version)) +
scale_y_continuous(limits = NULL, breaks = sapply(c(0.1, 0.2, 0.3, 0.4), function(x) x*nrow(post.nice.3) / 2), labels = c("10%","20%","30%","40%")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
post.nice.25 <- filter(post.nice, work_experience_programming == 25)
vline.data.25 <- post.nice.25 %>%
group_by(high_debt_version) %>%
summarize(z = mean(as.numeric(estimate)))
sprintf("Estimations for 25 years experience")
## [1] "Estimations for 25 years experience"
post.nice.25 %>%
ggplot() +
geom_histogram(aes(x=estimate),fill= "Light Blue", stat = "count") +
geom_vline(aes(xintercept = z),
vline.data.25,
col = "Dark Blue",
lwd = 1)+
facet_grid(rows = vars(high_debt_version)) +
scale_y_continuous(limits = NULL, breaks = sapply(c(0.1, 0.2, 0.3, 0.4), function(x) x*nrow(post.nice.25) / 2), labels = c("10%","20%","30%","40%")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
post.diff <- post %>% filter(high_debt_version == "true")
post.diff$estimate = post.diff$estimate - filter(post, high_debt_version == "false")$estimate
post.diff %>%
ggplot(aes(x=estimate)) +
geom_boxplot(quantile_lines = TRUE, quantile_fun = hdi, vline_linetype = 2) +
facet_grid(rows = vars(work_experience_programming)) +
labs(
title = "Scenario rating diff / years of programming experience",
subtitle = "Difference as: high debt rating - low debt rating",
x = "Rating difference"
) +
scale_y_continuous(breaks = NULL)
We can then proceed to calculate some likelihoods:
post.diff.10 <- post.diff %>% filter(work_experience_programming == 10)
high_debt_rated_higher <- sum(post.diff.10$estimate > 0)
low_debt_rated_higher <- sum(post.diff.10$estimate < 0)
x <- low_debt_rated_higher / high_debt_rated_higher
x
## [1] 3.136139
Participants with 10 years of professional programming experience were 214% more likely to rate the high debt version scenario as worse than then they were to rate the low debt version scenario as worse.