We plot the data and can see that there is no obvious difference between the debt versions.
d.both_completed %>%
mutate_at("high_debt_version",
function(x) case_when(
x == "false" ~ "Low debt",
x == "true" ~ "High debt"
)) %>%
ggplot(aes(high_debt_version)) +
geom_bar(aes(fill = hashcode.exists), position = position_fill(reverse = TRUE)) +
scale_fill_manual("Legend", values = c("darkblue", "lightblue"), labels = c("Implemented", "Not implemented"), guide = guide_legend(reverse = TRUE)) +
labs(title = "Hashcode Implementation") +
xlab("Debt version") +
ylab("Ratio of implementation")
For a boolean outcome, bernoulli is the most suitable family.
We include high_debt_verison
as a predictor in our model as this variable represent the very effect we want to measure. We also include a varying intercept for each individual to prevent the model from learning too much from single participants with extreme measurements.
Since they may correlate, hashcode inclusion and equals inclusion are both included in a single multivariate model.
We iterate over the model until we have sane priors, in this case a prior giving a 50/50 chance was chosen in both cases.
The prior lkj(2)
will mean the model is skeptical of strong correlations.
utility.with <- extendable_model(
base_name = "utility",
base_formula = "mvbind(hashcode.exists, equals.exists) ~ 1 + high_debt_version + (1 | c | session)",
base_priors = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 0.5), class = "Intercept"),
prior(exponential(1), class = "sd", resp = "equalsexists"),
prior(exponential(1), class = "sd", resp = "hashcodeexists"),
prior(lkj(2), class = "L")
),
family = bernoulli(),
data = d.both_completed,
)
prior_summary(utility.with(only_priors= TRUE))
## Setting 'rescor' to FALSE by default for this model
prior_summary(utility.with(sample_prior = "only"))
## Setting 'rescor' to FALSE by default for this model
pp_check(utility.with(sample_prior = "only"), type = "bars", nsamples = 200, resp = "equalsexists")
pp_check(utility.with(sample_prior = "only"), type = "bars", nsamples = 200, resp = "hashcodeexists")
We choose a beta parameter priors allowing for the beta parameter to account for 100% of the effect but that is skeptical to such strong effects from the beta parameter.
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 0, 0.5)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100
data.frame(x = sim.beta.diff) %>%
ggplot(aes(x)) +
geom_density() +
xlim(-150, 150) +
labs(
title = "Beta parameter prior influence",
x = "Estimate with beta as % of estimate without beta",
y = "Density"
)
We check the posterior distribution and can see that the model seems to have been able to fit the data well. Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.
pp_check(utility.with(), type = "bars", nsamples = 200, resp = "equalsexists")
pp_check(utility.with(), type = "bars", nsamples = 200, resp = "hashcodeexists")
summary(utility.with())
## Setting 'rescor' to FALSE by default for this model
## Family: MV(bernoulli, bernoulli)
## Links: mu = logit
## mu = logit
## Formula: hashcode.exists ~ 1 + high_debt_version + (1 | c | session)
## equals.exists ~ 1 + high_debt_version + (1 | c | session)
## Data: as.data.frame(data) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error
## sd(hashcodeexists_Intercept) 1.64 0.73
## sd(equalsexists_Intercept) 1.89 0.80
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.74 0.24
## l-95% CI u-95% CI Rhat
## sd(hashcodeexists_Intercept) 0.27 3.31 1.00
## sd(equalsexists_Intercept) 0.50 3.71 1.00
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.10 0.98 1.00
## Bulk_ESS Tail_ESS
## sd(hashcodeexists_Intercept) 1161 705
## sd(equalsexists_Intercept) 1221 980
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 1328 861
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## hashcodeexists_Intercept -0.21 0.46 -1.14 0.73 1.00
## equalsexists_Intercept -0.02 0.47 -0.94 0.89 1.00
## hashcodeexists_high_debt_versionfalse -0.18 0.60 -1.38 0.98 1.00
## equalsexists_high_debt_versionfalse -0.37 0.60 -1.54 0.80 1.00
## Bulk_ESS Tail_ESS
## hashcodeexists_Intercept 6406 3246
## equalsexists_Intercept 5653 2948
## hashcodeexists_high_debt_versionfalse 7742 2702
## equalsexists_high_debt_versionfalse 7724 3032
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(utility.with(), ask = FALSE)
# default prior for monotonic predictor
edlvl_prior <- c(
prior(dirichlet(2), class = "simo", coef = "moeducation_level1", resp = "equalsexists"),
prior(dirichlet(2), class = "simo", coef = "moeducation_level1", resp = "hashcodeexists")
)
We use loo
to check some possible extensions on the model.
loo_result <- loo(
# Benchmark model(s)
utility.with(),
# New model(s)
utility.with("work_domain"),
utility.with("work_experience_programming.s"),
utility.with("work_experience_java.s"),
utility.with("education_field"),
utility.with("mo(education_level)", edlvl_prior),
utility.with("workplace_peer_review"),
utility.with("workplace_td_tracking"),
utility.with("workplace_pair_programming"),
utility.with("workplace_coding_standards"),
utility.with("scenario"),
utility.with("group")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## utility.with("scenario") 0.0 0.0
## utility.with() -2.8 2.4
## utility.with("workplace_pair_programming") -2.9 2.9
## utility.with("workplace_td_tracking") -2.9 2.6
## utility.with("work_experience_java.s") -3.0 3.1
## utility.with("group") -3.1 2.6
## utility.with("workplace_coding_standards") -3.6 2.6
## utility.with("work_experience_programming.s") -3.7 3.3
## utility.with("mo(education_level)", edlvl_prior) -3.7 2.8
## utility.with("education_field") -3.7 2.5
## utility.with("workplace_peer_review") -3.9 2.6
## utility.with("work_domain") -5.6 2.9
loo_result[1]
## $loos
## $loos$`utility.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.7
## p_loo 24.5 2.9
## looic 116.7 11.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 934
## (0.5, 0.7] (ok) 15 34.1% 420
## (0.7, 1] (bad) 7 15.9% 107
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("work_domain")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.1 6.4
## p_loo 28.0 3.4
## looic 122.3 12.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 615
## (0.5, 0.7] (ok) 14 31.8% 236
## (0.7, 1] (bad) 9 20.5% 94
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -59.2 6.3
## p_loo 25.1 3.3
## looic 118.4 12.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 614
## (0.5, 0.7] (ok) 10 22.7% 277
## (0.7, 1] (bad) 12 27.3% 43
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.6 5.4
## p_loo 22.8 2.5
## looic 117.1 10.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 23 52.3% 696
## (0.5, 0.7] (ok) 13 29.5% 256
## (0.7, 1] (bad) 8 18.2% 141
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -59.2 5.7
## p_loo 24.8 2.9
## looic 118.5 11.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 24 54.5% 605
## (0.5, 0.7] (ok) 10 22.7% 164
## (0.7, 1] (bad) 9 20.5% 85
## (1, Inf) (very bad) 1 2.3% 68
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -59.2 6.2
## p_loo 26.6 3.3
## looic 118.5 12.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 19 43.2% 1263
## (0.5, 0.7] (ok) 16 36.4% 261
## (0.7, 1] (bad) 8 18.2% 118
## (1, Inf) (very bad) 1 2.3% 37
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -59.5 6.2
## p_loo 26.2 3.3
## looic 118.9 12.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 23 52.3% 1284
## (0.5, 0.7] (ok) 11 25.0% 288
## (0.7, 1] (bad) 9 20.5% 49
## (1, Inf) (very bad) 1 2.3% 49
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.9
## p_loo 25.2 3.0
## looic 116.8 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 475
## (0.5, 0.7] (ok) 14 31.8% 232
## (0.7, 1] (bad) 8 18.2% 90
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.4
## p_loo 22.7 2.5
## looic 116.8 10.7
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 24 54.5% 598
## (0.5, 0.7] (ok) 12 27.3% 328
## (0.7, 1] (bad) 8 18.2% 96
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -59.1 5.9
## p_loo 25.3 3.0
## looic 118.3 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 1027
## (0.5, 0.7] (ok) 11 25.0% 312
## (0.7, 1] (bad) 10 22.7% 72
## (1, Inf) (very bad) 1 2.3% 43
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -55.6 6.0
## p_loo 25.0 3.1
## looic 111.1 11.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 343
## (0.5, 0.7] (ok) 15 34.1% 134
## (0.7, 1] (bad) 8 18.2% 111
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("group")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.6 5.8
## p_loo 25.0 2.9
## looic 117.2 11.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 803
## (0.5, 0.7] (ok) 15 34.1% 271
## (0.7, 1] (bad) 7 15.9% 136
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
utility.with(),
utility.with("scenario"),
utility.with("workplace_td_tracking"),
utility.with("workplace_pair_programming"),
utility.with("work_experience_java.s"),
# New model(s)
utility.with(c("scenario", "workplace_td_tracking")),
utility.with(c("scenario", "workplace_pair_programming")),
utility.with(c("scenario", "work_experience_java.s")),
utility.with(c("workplace_td_tracking", "workplace_pair_programming")),
utility.with(c("workplace_td_tracking", "work_experience_java.s")),
utility.with(c("workplace_pair_programming", "work_experience_java.s"))
)
loo_result[2]
## $diffs
## elpd_diff
## utility.with("scenario") 0.0
## utility.with(c("scenario", "work_experience_java.s")) -0.7
## utility.with(c("scenario", "workplace_pair_programming")) -0.8
## utility.with(c("scenario", "workplace_td_tracking")) -1.8
## utility.with(c("workplace_pair_programming", "work_experience_java.s")) -2.0
## utility.with(c("workplace_td_tracking", "work_experience_java.s")) -2.4
## utility.with() -2.8
## utility.with(c("workplace_td_tracking", "workplace_pair_programming")) -2.8
## utility.with("workplace_pair_programming") -2.9
## utility.with("workplace_td_tracking") -2.9
## utility.with("work_experience_java.s") -3.0
## se_diff
## utility.with("scenario") 0.0
## utility.with(c("scenario", "work_experience_java.s")) 1.6
## utility.with(c("scenario", "workplace_pair_programming")) 1.3
## utility.with(c("scenario", "workplace_td_tracking")) 0.7
## utility.with(c("workplace_pair_programming", "work_experience_java.s")) 3.5
## utility.with(c("workplace_td_tracking", "work_experience_java.s")) 3.2
## utility.with() 2.4
## utility.with(c("workplace_td_tracking", "workplace_pair_programming")) 2.9
## utility.with("workplace_pair_programming") 2.9
## utility.with("workplace_td_tracking") 2.6
## utility.with("work_experience_java.s") 3.1
loo_result[1]
## $loos
## $loos$`utility.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.7
## p_loo 24.5 2.9
## looic 116.7 11.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 934
## (0.5, 0.7] (ok) 15 34.1% 420
## (0.7, 1] (bad) 7 15.9% 107
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -55.6 6.0
## p_loo 25.0 3.1
## looic 111.1 11.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 343
## (0.5, 0.7] (ok) 15 34.1% 134
## (0.7, 1] (bad) 8 18.2% 111
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.9
## p_loo 25.2 3.0
## looic 116.8 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 475
## (0.5, 0.7] (ok) 14 31.8% 232
## (0.7, 1] (bad) 8 18.2% 90
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.4
## p_loo 22.7 2.5
## looic 116.8 10.7
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 24 54.5% 598
## (0.5, 0.7] (ok) 12 27.3% 328
## (0.7, 1] (bad) 8 18.2% 96
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.6 5.4
## p_loo 22.8 2.5
## looic 117.1 10.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 23 52.3% 696
## (0.5, 0.7] (ok) 13 29.5% 256
## (0.7, 1] (bad) 8 18.2% 141
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with(c("scenario", "workplace_td_tracking"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -57.4 6.3
## p_loo 27.1 3.5
## looic 114.7 12.7
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 19 43.2% 636
## (0.5, 0.7] (ok) 16 36.4% 281
## (0.7, 1] (bad) 9 20.5% 53
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with(c("scenario", "workplace_pair_programming"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -56.3 5.9
## p_loo 24.3 3.1
## looic 112.6 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 612
## (0.5, 0.7] (ok) 16 36.4% 247
## (0.7, 1] (bad) 6 13.6% 151
## (1, Inf) (very bad) 1 2.3% 27
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with(c("scenario", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -56.3 5.9
## p_loo 24.3 3.0
## looic 112.5 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 713
## (0.5, 0.7] (ok) 16 36.4% 146
## (0.7, 1] (bad) 6 13.6% 66
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with(c("workplace_td_tracking", "workplace_pair_programming"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.6
## p_loo 23.6 2.7
## looic 116.8 11.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 25 56.8% 655
## (0.5, 0.7] (ok) 12 27.3% 269
## (0.7, 1] (bad) 7 15.9% 115
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with(c("workplace_td_tracking", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -57.9 5.1
## p_loo 21.0 2.3
## looic 115.8 10.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 25 56.8% 682
## (0.5, 0.7] (ok) 14 31.8% 191
## (0.7, 1] (bad) 5 11.4% 88
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with(c("workplace_pair_programming", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -57.5 5.4
## p_loo 21.0 2.4
## looic 115.1 10.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 28 63.6% 463
## (0.5, 0.7] (ok) 11 25.0% 208
## (0.7, 1] (bad) 5 11.4% 135
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
utility.with(),
utility.with("scenario"),
utility.with("workplace_td_tracking"),
utility.with("workplace_pair_programming"),
utility.with("work_experience_java.s"),
utility.with(c("scenario", "workplace_pair_programming")),
utility.with(c("scenario", "work_experience_java.s")),
# New model(s)
utility.with(c("scenario", "workplace_pair_programming", "work_experience_java.s"))
)
loo_result[2]
## $diffs
## elpd_diff
## utility.with("scenario") 0.0
## utility.with(c("scenario", "workplace_pair_programming", "work_experience_java.s")) -0.5
## utility.with(c("scenario", "work_experience_java.s")) -0.7
## utility.with(c("scenario", "workplace_pair_programming")) -0.8
## utility.with() -2.8
## utility.with("workplace_pair_programming") -2.9
## utility.with("workplace_td_tracking") -2.9
## utility.with("work_experience_java.s") -3.0
## se_diff
## utility.with("scenario") 0.0
## utility.with(c("scenario", "workplace_pair_programming", "work_experience_java.s")) 2.3
## utility.with(c("scenario", "work_experience_java.s")) 1.6
## utility.with(c("scenario", "workplace_pair_programming")) 1.3
## utility.with() 2.4
## utility.with("workplace_pair_programming") 2.9
## utility.with("workplace_td_tracking") 2.6
## utility.with("work_experience_java.s") 3.1
loo_result[1]
## $loos
## $loos$`utility.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.7
## p_loo 24.5 2.9
## looic 116.7 11.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 934
## (0.5, 0.7] (ok) 15 34.1% 420
## (0.7, 1] (bad) 7 15.9% 107
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -55.6 6.0
## p_loo 25.0 3.1
## looic 111.1 11.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 343
## (0.5, 0.7] (ok) 15 34.1% 134
## (0.7, 1] (bad) 8 18.2% 111
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.9
## p_loo 25.2 3.0
## looic 116.8 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 475
## (0.5, 0.7] (ok) 14 31.8% 232
## (0.7, 1] (bad) 8 18.2% 90
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.4 5.4
## p_loo 22.7 2.5
## looic 116.8 10.7
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 24 54.5% 598
## (0.5, 0.7] (ok) 12 27.3% 328
## (0.7, 1] (bad) 8 18.2% 96
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -58.6 5.4
## p_loo 22.8 2.5
## looic 117.1 10.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 23 52.3% 696
## (0.5, 0.7] (ok) 13 29.5% 256
## (0.7, 1] (bad) 8 18.2% 141
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with(c("scenario", "workplace_pair_programming"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -56.3 5.9
## p_loo 24.3 3.1
## looic 112.6 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 47.7% 612
## (0.5, 0.7] (ok) 16 36.4% 247
## (0.7, 1] (bad) 6 13.6% 151
## (1, Inf) (very bad) 1 2.3% 27
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with(c("scenario", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -56.3 5.9
## p_loo 24.3 3.0
## looic 112.5 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 22 50.0% 713
## (0.5, 0.7] (ok) 16 36.4% 146
## (0.7, 1] (bad) 6 13.6% 66
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`utility.with(c("scenario", "workplace_pair_programming", "work_experience_java.s"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -56.1 5.7
## p_loo 22.8 2.8
## looic 112.2 11.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 23 52.3% 341
## (0.5, 0.7] (ok) 15 34.1% 178
## (0.7, 1] (bad) 6 13.6% 53
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
We pick some of our top performing models as candidates and inspect them closer.
The candidate models are named and listed in order of complexity.
We select the simplest model as a baseline.
utility0 <- brm(
"mvbind(hashcode.exists, equals.exists) ~ 1 + high_debt_version + (1 | c | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 0.5), class = "Intercept"),
prior(exponential(1), class = "sd", resp = "equalsexists"),
prior(exponential(1), class = "sd", resp = "hashcodeexists"),
prior(lkj(2), class = "L")
),
family = bernoulli(),
data = as.data.frame(d.both_completed),
file = "fits/utility0",
file_refit = "on_change",
seed = 20210421
)
summary(utility0)
## Family: MV(bernoulli, bernoulli)
## Links: mu = logit
## mu = logit
## Formula: hashcode.exists ~ 1 + high_debt_version + (1 | c | session)
## equals.exists ~ 1 + high_debt_version + (1 | c | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error
## sd(hashcodeexists_Intercept) 1.64 0.73
## sd(equalsexists_Intercept) 1.89 0.80
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.74 0.24
## l-95% CI u-95% CI Rhat
## sd(hashcodeexists_Intercept) 0.27 3.31 1.00
## sd(equalsexists_Intercept) 0.50 3.71 1.00
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.10 0.98 1.00
## Bulk_ESS Tail_ESS
## sd(hashcodeexists_Intercept) 1161 705
## sd(equalsexists_Intercept) 1221 980
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 1328 861
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## hashcodeexists_Intercept -0.21 0.46 -1.14 0.73 1.00
## equalsexists_Intercept -0.02 0.47 -0.94 0.89 1.00
## hashcodeexists_high_debt_versionfalse -0.18 0.60 -1.38 0.98 1.00
## equalsexists_high_debt_versionfalse -0.37 0.60 -1.54 0.80 1.00
## Bulk_ESS Tail_ESS
## hashcodeexists_Intercept 6406 3246
## equalsexists_Intercept 5653 2948
## hashcodeexists_high_debt_versionfalse 7742 2702
## equalsexists_high_debt_versionfalse 7724 3032
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(utility0)
## $session
## , , hashcodeexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.1615195 0.9968789 -1.8006700 2.2058745
## 6033d90a5af2c702367b3a96 0.1512534 1.0154822 -1.9181960 2.2633215
## 6034fc165af2c702367b3a98 0.1555363 1.0196348 -1.8371745 2.2742235
## 603500725af2c702367b3a99 -1.6830844 1.5055523 -5.3283397 0.4924695
## 603f97625af2c702367b3a9d -1.6476580 1.5150320 -5.3237975 0.5487321
## 603fd5d95af2c702367b3a9e 0.1552801 1.0278094 -1.9393212 2.1537950
## 60409b7b5af2c702367b3a9f 0.1609842 1.0222248 -1.8467330 2.2358775
## 604b82b5a7718fbed181b336 1.9272734 1.5344801 -0.2630419 5.6623748
## 6050c1bf856f36729d2e5218 -1.6680866 1.5110558 -5.2837042 0.5283383
## 6050e1e7856f36729d2e5219 0.1484481 0.9738683 -1.7943890 2.1523138
## 6055fdc6856f36729d2e521b 1.9012628 1.5199517 -0.2368000 5.4957390
## 60589862856f36729d2e521f 0.1481716 1.0105535 -1.8765215 2.1646453
## 605afa3a856f36729d2e5222 -1.6486603 1.4774384 -5.2922800 0.4849446
## 605c8bc6856f36729d2e5223 -1.6588600 1.4897712 -5.2223655 0.5246130
## 605f3f2d856f36729d2e5224 -1.6703548 1.5528200 -5.5336250 0.5875655
## 605f46c3856f36729d2e5225 1.9035082 1.4577437 -0.2625947 5.3155635
## 60605337856f36729d2e5226 -1.6464429 1.5054417 -5.0410103 0.5459434
## 60609ae6856f36729d2e5228 -1.6470511 1.4812563 -5.1416023 0.4979858
## 6061ce91856f36729d2e522e 0.1550445 0.9788262 -1.8440070 2.1637685
## 6061f106856f36729d2e5231 0.7025520 1.0580009 -1.2798235 2.9332057
## 6068ea9f856f36729d2e523e -1.6233035 1.4648741 -5.1444320 0.5477608
## 6075ab05856f36729d2e5247 1.9200786 1.4591593 -0.1900669 5.3570048
##
## , , equalsexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.1684182 1.128443 -2.1580765 2.4645033
## 6033d90a5af2c702367b3a96 0.1448039 1.103152 -1.9902175 2.3478185
## 6034fc165af2c702367b3a98 0.1420880 1.116028 -2.0952990 2.3974653
## 603500725af2c702367b3a99 -1.9484547 1.680425 -6.0477305 0.5804802
## 603f97625af2c702367b3a9d -1.9346401 1.698377 -6.2767130 0.5887695
## 603fd5d95af2c702367b3a9e 0.1645387 1.138741 -2.1868562 2.4444818
## 60409b7b5af2c702367b3a9f 0.1361878 1.081690 -1.9294800 2.2817895
## 604b82b5a7718fbed181b336 2.2050758 1.687960 -0.2956444 6.3624643
## 6050c1bf856f36729d2e5218 -1.9354763 1.655356 -5.9232450 0.5043664
## 6050e1e7856f36729d2e5219 0.1450461 1.085839 -2.0513083 2.3607495
## 6055fdc6856f36729d2e521b 2.1738827 1.622122 -0.1938018 6.2410903
## 60589862856f36729d2e521f 0.1441772 1.109126 -2.0754220 2.4272118
## 605afa3a856f36729d2e5222 -1.9256172 1.660404 -5.8154382 0.5671407
## 605c8bc6856f36729d2e5223 -1.9124173 1.600847 -5.6931613 0.4725867
## 605f3f2d856f36729d2e5224 -1.9375301 1.707383 -6.0666655 0.6064815
## 605f46c3856f36729d2e5225 2.1641563 1.651076 -0.2579165 6.1657350
## 60605337856f36729d2e5226 -1.9256093 1.703642 -6.0996960 0.5313132
## 60609ae6856f36729d2e5228 -1.8896771 1.623623 -5.7311717 0.5890986
## 6061ce91856f36729d2e522e 0.1216251 1.077869 -2.0395287 2.2792523
## 6061f106856f36729d2e5231 1.4139214 1.388622 -0.7840861 4.5940370
## 6068ea9f856f36729d2e523e -1.9144119 1.683011 -5.9638200 0.5565185
## 6075ab05856f36729d2e5247 2.1989480 1.650557 -0.2653494 6.2432115
plot(utility0, ask = FALSE)
pp_check(utility0, nsamples = 200, type = "bars", resp = "equalsexists")
pp_check(utility0, nsamples = 200, type = "bars", resp = "hashcodeexists")
We select the best performing model with one variable.
utility1 <- brm(
"mvbind(hashcode.exists, equals.exists) ~ 1 + high_debt_version + scenario + (1 | c | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 0.5), class = "Intercept"),
prior(exponential(1), class = "sd", resp = "equalsexists"),
prior(exponential(1), class = "sd", resp = "hashcodeexists"),
prior(lkj(2), class = "L")
),
family = bernoulli(),
data = as.data.frame(d.both_completed),
file = "fits/utility1",
file_refit = "on_change",
seed = 20210421
)
summary(utility1)
## Family: MV(bernoulli, bernoulli)
## Links: mu = logit
## mu = logit
## Formula: hashcode.exists ~ 1 + high_debt_version + scenario + (1 | c | session)
## equals.exists ~ 1 + high_debt_version + scenario + (1 | c | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error
## sd(hashcodeexists_Intercept) 1.82 0.74
## sd(equalsexists_Intercept) 2.07 0.84
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.77 0.18
## l-95% CI u-95% CI Rhat
## sd(hashcodeexists_Intercept) 0.61 3.51 1.00
## sd(equalsexists_Intercept) 0.68 4.01 1.00
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.29 0.98 1.00
## Bulk_ESS Tail_ESS
## sd(hashcodeexists_Intercept) 1901 1611
## sd(equalsexists_Intercept) 1716 1434
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 1984 1927
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## hashcodeexists_Intercept 0.24 0.56 -0.85 1.36 1.00
## equalsexists_Intercept 0.35 0.56 -0.73 1.48 1.00
## hashcodeexists_high_debt_versionfalse -0.14 0.63 -1.41 1.11 1.00
## hashcodeexists_scenariotickets -0.94 0.62 -2.21 0.25 1.00
## equalsexists_high_debt_versionfalse -0.36 0.63 -1.65 0.91 1.00
## equalsexists_scenariotickets -0.76 0.64 -2.06 0.46 1.00
## Bulk_ESS Tail_ESS
## hashcodeexists_Intercept 5727 2957
## equalsexists_Intercept 5651 2838
## hashcodeexists_high_debt_versionfalse 8028 2888
## hashcodeexists_scenariotickets 7033 2659
## equalsexists_high_debt_versionfalse 7668 2569
## equalsexists_scenariotickets 7562 2894
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(utility1)
## $session
## , , hashcodeexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.1495391 1.101614 -2.0657952 2.3817488
## 6033d90a5af2c702367b3a96 0.1418125 1.052131 -1.9618638 2.2479425
## 6034fc165af2c702367b3a98 0.1454069 1.056200 -1.9944470 2.3234030
## 603500725af2c702367b3a99 -1.9075629 1.570466 -5.8157898 0.4342054
## 603f97625af2c702367b3a9d -1.9199528 1.585006 -5.5869728 0.5168779
## 603fd5d95af2c702367b3a9e 0.1650018 1.014490 -1.8762025 2.2177800
## 60409b7b5af2c702367b3a9f 0.1592306 1.034768 -1.9542895 2.2945550
## 604b82b5a7718fbed181b336 2.1697601 1.559108 -0.1647747 5.7706030
## 6050c1bf856f36729d2e5218 -1.8759672 1.580014 -5.8061553 0.4745505
## 6050e1e7856f36729d2e5219 0.1720244 1.037983 -1.9036215 2.2944260
## 6055fdc6856f36729d2e521b 2.1543313 1.580852 -0.1684798 5.8957125
## 60589862856f36729d2e521f 0.1758781 1.047749 -2.0027763 2.2773588
## 605afa3a856f36729d2e5222 -1.9026004 1.585752 -5.8890733 0.4286742
## 605c8bc6856f36729d2e5223 -1.9203595 1.556560 -5.6682102 0.3767893
## 605f3f2d856f36729d2e5224 -1.9319871 1.601526 -5.9310105 0.4116877
## 605f46c3856f36729d2e5225 2.1563512 1.570433 -0.1838777 5.8146085
## 60605337856f36729d2e5226 -1.9083993 1.574336 -5.7176615 0.4587586
## 60609ae6856f36729d2e5228 -1.9133694 1.581651 -5.6113362 0.4764795
## 6061ce91856f36729d2e522e 0.1618948 1.055682 -1.9077032 2.3328713
## 6061f106856f36729d2e5231 0.7712594 1.113690 -1.3606772 3.0116810
## 6068ea9f856f36729d2e523e -1.9064301 1.579020 -5.6468015 0.3861303
## 6075ab05856f36729d2e5247 2.1655003 1.566090 -0.2158562 5.8733145
##
## , , equalsexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.1644478 1.183392 -2.1772805 2.5458043
## 6033d90a5af2c702367b3a96 0.1529534 1.157938 -2.2346032 2.5427800
## 6034fc165af2c702367b3a98 0.1716722 1.121190 -2.0048438 2.4441618
## 603500725af2c702367b3a99 -2.1859511 1.781654 -6.4992880 0.4408563
## 603f97625af2c702367b3a9d -2.1835355 1.722076 -6.2436810 0.4526393
## 603fd5d95af2c702367b3a9e 0.1445457 1.121391 -2.0600817 2.4836240
## 60409b7b5af2c702367b3a9f 0.1761629 1.173377 -2.1773458 2.5471893
## 604b82b5a7718fbed181b336 2.4295994 1.708255 -0.2040509 6.2307995
## 6050c1bf856f36729d2e5218 -2.1690286 1.826635 -6.5069390 0.6290672
## 6050e1e7856f36729d2e5219 0.1601141 1.163231 -2.2141572 2.4459323
## 6055fdc6856f36729d2e521b 2.4254637 1.751672 -0.1856575 6.6280155
## 60589862856f36729d2e521f 0.1587501 1.175027 -2.2030072 2.4443133
## 605afa3a856f36729d2e5222 -2.1562306 1.781969 -6.3928430 0.4465869
## 605c8bc6856f36729d2e5223 -2.1962799 1.763735 -6.4953030 0.4444716
## 605f3f2d856f36729d2e5224 -2.1773336 1.775335 -6.6135240 0.4116100
## 605f46c3856f36729d2e5225 2.4579763 1.804822 -0.1845521 6.7306973
## 60605337856f36729d2e5226 -2.1551328 1.734371 -6.2536398 0.4583326
## 60609ae6856f36729d2e5228 -2.1715045 1.754398 -6.1679705 0.4160527
## 6061ce91856f36729d2e522e 0.1509883 1.166842 -2.2763835 2.5121533
## 6061f106856f36729d2e5231 1.5493903 1.461876 -0.7745020 4.9373700
## 6068ea9f856f36729d2e523e -2.1955698 1.772856 -6.3949013 0.4300521
## 6075ab05856f36729d2e5247 2.4234707 1.710659 -0.1707854 6.5318583
plot(utility1, ask = FALSE)
pp_check(utility1, nsamples = 200, type = "bars", resp = "equalsexists")
pp_check(utility1, nsamples = 200, type = "bars", resp = "hashcodeexists")
We select the best performing model with two variables.
utility2 <- brm(
"mvbind(hashcode.exists, equals.exists) ~ 1 + high_debt_version + scenario + work_experience_java.s + (1 | c | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 0.5), class = "Intercept"),
prior(exponential(1), class = "sd", resp = "equalsexists"),
prior(exponential(1), class = "sd", resp = "hashcodeexists"),
prior(lkj(2), class = "L")
),
family = bernoulli(),
data = as.data.frame(d.both_completed),
file = "fits/utility2",
file_refit = "on_change",
seed = 20210421
)
summary(utility2)
## Family: MV(bernoulli, bernoulli)
## Links: mu = logit
## mu = logit
## Formula: hashcode.exists ~ 1 + high_debt_version + scenario + work_experience_java.s + (1 | c | session)
## equals.exists ~ 1 + high_debt_version + scenario + work_experience_java.s + (1 | c | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error
## sd(hashcodeexists_Intercept) 1.58 0.79
## sd(equalsexists_Intercept) 1.77 0.81
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.70 0.26
## l-95% CI u-95% CI Rhat
## sd(hashcodeexists_Intercept) 0.22 3.36 1.00
## sd(equalsexists_Intercept) 0.38 3.60 1.00
## cor(hashcodeexists_Intercept,equalsexists_Intercept) -0.02 0.97 1.00
## Bulk_ESS Tail_ESS
## sd(hashcodeexists_Intercept) 1110 943
## sd(equalsexists_Intercept) 1203 957
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 1263 918
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## hashcodeexists_Intercept 0.17 0.58 -0.95 1.34 1.00
## equalsexists_Intercept 0.27 0.59 -0.86 1.44 1.00
## hashcodeexists_high_debt_versionfalse -0.11 0.64 -1.36 1.15 1.00
## hashcodeexists_scenariotickets -0.93 0.63 -2.18 0.27 1.00
## hashcodeexists_work_experience_java.s -0.74 0.55 -1.84 0.30 1.00
## equalsexists_high_debt_versionfalse -0.32 0.63 -1.60 0.90 1.00
## equalsexists_scenariotickets -0.74 0.66 -2.02 0.50 1.00
## equalsexists_work_experience_java.s -0.83 0.57 -2.02 0.28 1.00
## Bulk_ESS Tail_ESS
## hashcodeexists_Intercept 5357 3126
## equalsexists_Intercept 6154 2895
## hashcodeexists_high_debt_versionfalse 7477 2495
## hashcodeexists_scenariotickets 5718 3086
## hashcodeexists_work_experience_java.s 4485 2886
## equalsexists_high_debt_versionfalse 9514 2995
## equalsexists_scenariotickets 8201 2921
## equalsexists_work_experience_java.s 3744 2948
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(utility2)
## $session
## , , hashcodeexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.06414153 1.018728 -2.1802553 2.0425843
## 6033d90a5af2c702367b3a96 -0.06228974 1.037465 -2.1523095 2.0169370
## 6034fc165af2c702367b3a98 -0.05451971 1.004478 -2.1739220 1.9307800
## 603500725af2c702367b3a99 -1.75952936 1.545345 -5.5436135 0.4106344
## 603f97625af2c702367b3a9d -1.82483758 1.596653 -5.7513800 0.3272398
## 603fd5d95af2c702367b3a9e -0.04314116 1.017808 -2.1012703 2.0440707
## 60409b7b5af2c702367b3a9f -0.06027824 1.030347 -2.1702100 2.0922955
## 604b82b5a7718fbed181b336 1.75756219 1.525833 -0.3845553 5.4097235
## 6050c1bf856f36729d2e5218 -1.68136108 1.559049 -5.4093442 0.5182542
## 6050e1e7856f36729d2e5219 0.12551502 1.041856 -2.0139910 2.2789308
## 6055fdc6856f36729d2e521b 1.66181780 1.502336 -0.4447387 5.1784453
## 60589862856f36729d2e521f 0.69938070 1.076683 -1.3366230 2.9823383
## 605afa3a856f36729d2e5222 -1.06142463 1.487821 -4.6918608 1.3131368
## 605c8bc6856f36729d2e5223 -1.39344827 1.510121 -4.9893980 0.7978276
## 605f3f2d856f36729d2e5224 -0.79829855 1.595320 -4.6358515 1.8638480
## 605f46c3856f36729d2e5225 1.72599123 1.553784 -0.4507717 5.6542328
## 60605337856f36729d2e5226 -1.64878503 1.557853 -5.4335367 0.5401537
## 60609ae6856f36729d2e5228 -1.78188023 1.581559 -5.5542812 0.4303594
## 6061ce91856f36729d2e522e -0.03292497 1.029498 -2.1098167 2.0241515
## 6061f106856f36729d2e5231 0.49733829 1.083213 -1.5319392 2.8252305
## 6068ea9f856f36729d2e523e -1.58408785 1.530658 -5.2671352 0.5749691
## 6075ab05856f36729d2e5247 1.78764154 1.505038 -0.3053825 5.3674997
##
## , , equalsexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.08875290 1.092509 -2.3035357 2.1644590
## 6033d90a5af2c702367b3a96 -0.08420082 1.077556 -2.2617817 2.0904293
## 6034fc165af2c702367b3a98 -0.08084843 1.103634 -2.3669210 2.1588293
## 603500725af2c702367b3a99 -2.02161557 1.678061 -5.9924155 0.3258256
## 603f97625af2c702367b3a9d -2.03206674 1.699322 -6.0729250 0.4051052
## 603fd5d95af2c702367b3a9e -0.05098973 1.094485 -2.2803605 2.1252000
## 60409b7b5af2c702367b3a9f -0.08789528 1.093609 -2.3340423 2.1074372
## 604b82b5a7718fbed181b336 1.98338222 1.679973 -0.5307758 6.1034415
## 6050c1bf856f36729d2e5218 -1.88417480 1.687105 -5.9334963 0.5522080
## 6050e1e7856f36729d2e5219 0.13780417 1.082933 -2.0077320 2.4185840
## 6055fdc6856f36729d2e521b 1.82788170 1.592712 -0.4464406 5.7515735
## 60589862856f36729d2e521f 0.77371894 1.191779 -1.3893148 3.3525570
## 605afa3a856f36729d2e5222 -1.20825728 1.662282 -5.2386255 1.4352398
## 605c8bc6856f36729d2e5223 -1.60215455 1.696583 -5.6755522 0.8524726
## 605f3f2d856f36729d2e5224 -0.91594129 1.799877 -5.0445683 2.1152215
## 605f46c3856f36729d2e5225 1.89197045 1.634677 -0.4970580 5.7841832
## 60605337856f36729d2e5226 -1.82587306 1.574009 -5.5426032 0.4833384
## 60609ae6856f36729d2e5228 -2.02576476 1.648651 -5.9829242 0.3359967
## 6061ce91856f36729d2e522e -0.07333596 1.128521 -2.4198813 2.1769150
## 6061f106856f36729d2e5231 1.17455258 1.393858 -1.0267760 4.5866550
## 6068ea9f856f36729d2e523e -1.76911435 1.651877 -5.6901235 0.6612151
## 6075ab05856f36729d2e5247 2.00057609 1.602869 -0.3293568 5.8658958
plot(utility2, ask = FALSE)
pp_check(utility2, nsamples = 200, type = "bars", resp = "equalsexists")
pp_check(utility2, nsamples = 200, type = "bars", resp = "hashcodeexists")
We select the best performing model with three variables.
utility3 <- brm(
"mvbind(hashcode.exists, equals.exists) ~ 1 + high_debt_version + scenario + work_experience_java.s + workplace_pair_programming + (1 | c | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 0.5), class = "Intercept"),
prior(exponential(1), class = "sd", resp = "equalsexists"),
prior(exponential(1), class = "sd", resp = "hashcodeexists"),
prior(lkj(2), class = "L")
),
family = bernoulli(),
data = as.data.frame(d.both_completed),
file = "fits/utility3",
file_refit = "on_change",
seed = 20210421
)
summary(utility3)
## Family: MV(bernoulli, bernoulli)
## Links: mu = logit
## mu = logit
## Formula: hashcode.exists ~ 1 + high_debt_version + scenario + work_experience_java.s + workplace_pair_programming + (1 | c | session)
## equals.exists ~ 1 + high_debt_version + scenario + work_experience_java.s + workplace_pair_programming + (1 | c | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error
## sd(hashcodeexists_Intercept) 1.35 0.77
## sd(equalsexists_Intercept) 1.57 0.84
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.62 0.32
## l-95% CI u-95% CI Rhat
## sd(hashcodeexists_Intercept) 0.11 3.12 1.00
## sd(equalsexists_Intercept) 0.16 3.51 1.01
## cor(hashcodeexists_Intercept,equalsexists_Intercept) -0.28 0.96 1.01
## Bulk_ESS Tail_ESS
## sd(hashcodeexists_Intercept) 770 634
## sd(equalsexists_Intercept) 895 1073
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 1017 1055
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI
## hashcodeexists_Intercept 0.77 0.76 -0.73
## equalsexists_Intercept 0.77 0.78 -0.72
## hashcodeexists_high_debt_versionfalse -0.11 0.61 -1.32
## hashcodeexists_scenariotickets -0.91 0.62 -2.17
## hashcodeexists_work_experience_java.s -0.70 0.55 -1.86
## hashcodeexists_workplace_pair_programmingfalse -0.86 0.75 -2.28
## equalsexists_high_debt_versionfalse -0.32 0.63 -1.57
## equalsexists_scenariotickets -0.72 0.64 -2.00
## equalsexists_work_experience_java.s -0.80 0.57 -1.95
## equalsexists_workplace_pair_programmingfalse -0.73 0.77 -2.24
## u-95% CI Rhat Bulk_ESS Tail_ESS
## hashcodeexists_Intercept 2.24 1.00 6349 3160
## equalsexists_Intercept 2.35 1.00 6312 2804
## hashcodeexists_high_debt_versionfalse 1.08 1.00 7596 2759
## hashcodeexists_scenariotickets 0.29 1.00 6441 2582
## hashcodeexists_work_experience_java.s 0.31 1.00 4538 2955
## hashcodeexists_workplace_pair_programmingfalse 0.61 1.00 5843 3189
## equalsexists_high_debt_versionfalse 0.90 1.00 6228 2925
## equalsexists_scenariotickets 0.51 1.00 6756 2731
## equalsexists_work_experience_java.s 0.26 1.00 5180 3247
## equalsexists_workplace_pair_programmingfalse 0.77 1.00 4843 2808
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(utility3)
## $session
## , , hashcodeexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.05339216 0.9353788 -1.9074077 1.9952703
## 6033d90a5af2c702367b3a96 0.06342651 0.9609684 -1.9167342 2.0613248
## 6034fc165af2c702367b3a98 0.07377534 0.9247527 -1.7938502 1.9770695
## 603500725af2c702367b3a99 -1.40042605 1.4482547 -4.9892163 0.5598102
## 603f97625af2c702367b3a9d -1.37453324 1.4170896 -4.8784553 0.4743743
## 603fd5d95af2c702367b3a9e -0.28154708 0.9507231 -2.2664608 1.5834170
## 60409b7b5af2c702367b3a9f 0.06749034 0.9968167 -1.9414058 2.1647618
## 604b82b5a7718fbed181b336 1.23786659 1.4410780 -0.6986493 4.7967040
## 6050c1bf856f36729d2e5218 -1.28920351 1.4162785 -4.7085060 0.6332899
## 6050e1e7856f36729d2e5219 -0.12781467 0.9608421 -2.0882290 1.8417218
## 6055fdc6856f36729d2e521b 1.18470601 1.4600368 -0.8586397 4.7420573
## 60589862856f36729d2e521f 0.65967253 1.0566028 -1.2224262 3.0229148
## 605afa3a856f36729d2e5222 -0.86150665 1.3980691 -4.3318265 1.2916418
## 605c8bc6856f36729d2e5223 -1.04965602 1.4039104 -4.3662610 0.9116516
## 605f3f2d856f36729d2e5224 -0.63153616 1.5066127 -4.2481653 1.8892315
## 605f46c3856f36729d2e5225 1.23347668 1.4246549 -0.7505109 4.6499080
## 60605337856f36729d2e5226 -1.60883007 1.5113296 -5.2230715 0.3726654
## 60609ae6856f36729d2e5228 -1.39583991 1.4576114 -4.8359055 0.5874207
## 6061ce91856f36729d2e522e 0.06511282 0.9385306 -1.8856417 1.9949340
## 6061f106856f36729d2e5231 0.48572874 1.0050382 -1.3388120 2.7551813
## 6068ea9f856f36729d2e523e -1.22218462 1.4197485 -4.6692025 0.6782865
## 6075ab05856f36729d2e5247 1.59291944 1.4985030 -0.3641837 5.3211953
##
## , , equalsexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.04175473 1.012968 -2.0592650 2.1802043
## 6033d90a5af2c702367b3a96 0.01473031 1.076840 -2.2351950 2.2344443
## 6034fc165af2c702367b3a98 0.03791131 1.051466 -2.1699732 2.2024010
## 603500725af2c702367b3a99 -1.64274568 1.625425 -5.6443082 0.5527320
## 603f97625af2c702367b3a9d -1.60803054 1.598349 -5.3950772 0.6498862
## 603fd5d95af2c702367b3a9e -0.35010413 1.107881 -2.6804337 1.8065490
## 60409b7b5af2c702367b3a9f 0.04654250 1.093546 -2.1989622 2.2382900
## 604b82b5a7718fbed181b336 1.46637726 1.612612 -0.7710676 5.3702585
## 6050c1bf856f36729d2e5218 -1.52811542 1.577005 -5.3642630 0.6527798
## 6050e1e7856f36729d2e5219 -0.16207899 1.045941 -2.3997693 1.9268033
## 6055fdc6856f36729d2e521b 1.40017778 1.607288 -0.8816974 5.1299810
## 60589862856f36729d2e521f 0.76630553 1.162200 -1.2594828 3.2766073
## 605afa3a856f36729d2e5222 -1.01053071 1.609622 -5.0130038 1.4350593
## 605c8bc6856f36729d2e5223 -1.21658722 1.538378 -5.1305682 1.0582225
## 605f3f2d856f36729d2e5224 -0.73603953 1.689703 -4.8696968 2.0745458
## 605f46c3856f36729d2e5225 1.44509756 1.637319 -0.7762094 5.4378365
## 60605337856f36729d2e5226 -1.89509873 1.671504 -5.9895357 0.3471111
## 60609ae6856f36729d2e5228 -1.66419656 1.616327 -5.6497190 0.5901393
## 6061ce91856f36729d2e522e 0.07870477 1.035389 -2.0520995 2.2997058
## 6061f106856f36729d2e5231 1.14945847 1.379014 -0.8738288 4.4228398
## 6068ea9f856f36729d2e523e -1.42537237 1.572868 -5.3352937 0.7362256
## 6075ab05856f36729d2e5247 1.83049000 1.661671 -0.3482795 5.8782308
plot(utility3, ask = FALSE)
pp_check(utility3, nsamples = 200, type = "bars", resp = "equalsexists")
pp_check(utility3, nsamples = 200, type = "bars", resp = "hashcodeexists")
All candidate models look nice, canidate 1 performes better than all less complex models, we will proceed with: utility1
We will try a few different variations of the selected candidate model.
Some participants did only complete one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.
utility1.all <- brm(
"mvbind(hashcode.exists, equals.exists) ~ 1 + high_debt_version + scenario + (1 | c | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 0.5), class = "Intercept"),
prior(exponential(1), class = "sd", resp = "equalsexists"),
prior(exponential(1), class = "sd", resp = "hashcodeexists"),
prior(lkj(2), class = "L")
),
family = bernoulli(),
data = as.data.frame(d.completed),
file = "fits/utility1.all",
file_refit = "on_change",
seed = 20210421
)
summary(utility1.all)
## Family: MV(bernoulli, bernoulli)
## Links: mu = logit
## mu = logit
## Formula: hashcode.exists ~ 1 + high_debt_version + scenario + (1 | c | session)
## equals.exists ~ 1 + high_debt_version + scenario + (1 | c | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error
## sd(hashcodeexists_Intercept) 2.19 0.81
## sd(equalsexists_Intercept) 2.43 0.88
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.84 0.13
## l-95% CI u-95% CI Rhat
## sd(hashcodeexists_Intercept) 0.87 4.05 1.00
## sd(equalsexists_Intercept) 1.01 4.49 1.00
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.51 0.98 1.00
## Bulk_ESS Tail_ESS
## sd(hashcodeexists_Intercept) 2159 1414
## sd(equalsexists_Intercept) 2696 2616
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 2520 2019
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## hashcodeexists_Intercept 0.27 0.60 -0.87 1.48 1.00
## equalsexists_Intercept 0.38 0.60 -0.80 1.54 1.00
## hashcodeexists_high_debt_versionfalse 0.10 0.62 -1.09 1.29 1.00
## hashcodeexists_scenariotickets -0.99 0.65 -2.27 0.25 1.00
## equalsexists_high_debt_versionfalse -0.12 0.62 -1.36 1.12 1.00
## equalsexists_scenariotickets -0.81 0.63 -2.07 0.40 1.00
## Bulk_ESS Tail_ESS
## hashcodeexists_Intercept 5589 3470
## equalsexists_Intercept 7043 2810
## hashcodeexists_high_debt_versionfalse 7818 2761
## hashcodeexists_scenariotickets 7878 3150
## equalsexists_high_debt_versionfalse 8048 3201
## equalsexists_scenariotickets 9099 3095
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(utility1.all)
## $session
## , , hashcodeexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -1.63507116 1.892982 -6.30618050 1.3916893
## 6033d69a5af2c702367b3a95 0.11698351 1.117664 -2.06815600 2.3622423
## 6033d90a5af2c702367b3a96 0.10557875 1.143949 -2.16366700 2.4286065
## 6034fc165af2c702367b3a98 0.09901828 1.127787 -2.19056525 2.3635573
## 603500725af2c702367b3a99 -2.39443603 1.792083 -6.74645625 0.2735826
## 603f84f15af2c702367b3a9b 2.16494404 1.789651 -0.63850405 6.3942285
## 603f97625af2c702367b3a9d -2.37513428 1.726646 -6.51788650 0.2571507
## 603fd5d95af2c702367b3a9e 0.12324595 1.173183 -2.26060800 2.5111830
## 60409b7b5af2c702367b3a9f 0.10311800 1.121689 -2.10786825 2.3664440
## 604b82b5a7718fbed181b336 2.53095087 1.753659 -0.11795640 6.7398465
## 604f1239a7718fbed181b33f -1.60570987 1.855346 -6.11314400 1.3948615
## 6050c1bf856f36729d2e5218 -2.40333664 1.813197 -6.79176575 0.3354273
## 6050e1e7856f36729d2e5219 0.09025062 1.139717 -2.25073800 2.3649410
## 6055fdc6856f36729d2e521b 2.53476733 1.788860 -0.16959898 6.9289595
## 60579f2a856f36729d2e521e 2.17951656 1.861783 -0.67131400 6.5407180
## 60589862856f36729d2e521f 0.06839683 1.110165 -2.19110775 2.3313238
## 605a30a7856f36729d2e5221 -1.62098744 1.913429 -6.21198675 1.4255213
## 605afa3a856f36729d2e5222 -2.40098522 1.770651 -6.67349875 0.2690176
## 605c8bc6856f36729d2e5223 -2.35610152 1.737013 -6.61177875 0.2141804
## 605f3f2d856f36729d2e5224 -2.44171772 1.823094 -6.86020750 0.2000404
## 605f46c3856f36729d2e5225 2.52801243 1.724243 -0.06979473 6.7309338
## 60605337856f36729d2e5226 -2.40515833 1.824547 -6.77280025 0.2942651
## 60609ae6856f36729d2e5228 -2.41803639 1.791529 -6.67614625 0.2591749
## 6061ce91856f36729d2e522e 0.11181118 1.089746 -2.02658700 2.2723295
## 6061f106856f36729d2e5231 0.85909221 1.202530 -1.37816950 3.4660483
## 60672faa856f36729d2e523c 2.18431089 1.814163 -0.62797077 6.4501045
## 6068ea9f856f36729d2e523e -2.38723912 1.746404 -6.50805825 0.2256618
## 606db69d856f36729d2e5243 1.72734520 1.800311 -1.18761625 5.8689435
## 6075ab05856f36729d2e5247 2.48849688 1.721436 -0.19531930 6.6422827
##
## , , equalsexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -1.79561850 2.008687 -6.42759900 1.3483705
## 6033d69a5af2c702367b3a95 0.08715194 1.209628 -2.39487300 2.4935023
## 6033d90a5af2c702367b3a96 0.10922341 1.207409 -2.27506750 2.5574778
## 6034fc165af2c702367b3a98 0.07328676 1.213422 -2.35450375 2.4586943
## 603500725af2c702367b3a99 -2.67638646 1.973043 -7.28155425 0.3134005
## 603f84f15af2c702367b3a9b 2.37407420 1.968000 -0.68766250 7.0593108
## 603f97625af2c702367b3a9d -2.62856779 1.903906 -6.97479500 0.2923130
## 603fd5d95af2c702367b3a9e 0.13119989 1.201552 -2.30171175 2.4859528
## 60409b7b5af2c702367b3a9f 0.07498060 1.208618 -2.37196650 2.4953018
## 604b82b5a7718fbed181b336 2.78286181 1.937668 -0.06703994 7.5114778
## 604f1239a7718fbed181b33f -1.80782836 2.013401 -6.55021300 1.3311308
## 6050c1bf856f36729d2e5218 -2.68261734 2.013123 -7.43707175 0.3225701
## 6050e1e7856f36729d2e5219 0.05047839 1.197898 -2.37299300 2.4567512
## 6055fdc6856f36729d2e521b 2.80350112 1.984032 -0.11487365 7.6911688
## 60579f2a856f36729d2e521e 2.40598014 2.016117 -0.63429630 7.0842768
## 60589862856f36729d2e521f 0.08110140 1.185959 -2.40785500 2.4200145
## 605a30a7856f36729d2e5221 -1.81822073 2.089635 -6.68750375 1.4377893
## 605afa3a856f36729d2e5222 -2.63130112 1.902151 -7.08459100 0.2059165
## 605c8bc6856f36729d2e5223 -2.65500875 1.897023 -7.22657000 0.1875890
## 605f3f2d856f36729d2e5224 -2.69970608 2.027798 -7.25769425 0.2086325
## 605f46c3856f36729d2e5225 2.77456357 1.854276 -0.03403104 7.2411338
## 60605337856f36729d2e5226 -2.68638839 1.942321 -7.42074000 0.3366272
## 60609ae6856f36729d2e5228 -2.67566957 1.974827 -7.49955375 0.2559838
## 6061ce91856f36729d2e522e 0.10306527 1.171709 -2.23173250 2.5197953
## 6061f106856f36729d2e5231 1.61108601 1.510602 -0.80123378 5.0916287
## 60672faa856f36729d2e523c 2.39482527 1.990461 -0.77332728 6.9769828
## 6068ea9f856f36729d2e523e -2.66163704 1.951323 -7.30756825 0.2090115
## 606db69d856f36729d2e5243 1.88917747 1.927497 -1.26545175 6.3491678
## 6075ab05856f36729d2e5247 2.75617816 1.937264 -0.13328502 7.4326588
plot(utility1.all, ask = FALSE)
pp_check(utility1.all, type = "bars", nsamples = 200, resp = "hashcodeexists")
pp_check(utility1.all, type = "bars", nsamples = 200, resp = "equalsexists")
As including all data points didn’t harm the model we will create this variant with all data points as well.
This variation includes work_experience_programming.s
predictors as it can give further insight into how experience play a factor in the effect we try to measure. This is especially important as our sampling shewed towards containing less experienced developer than the population at large.
utility1.all.exp <- brm(
"mvbind(hashcode.exists, equals.exists) ~ 1 + high_debt_version + scenario + work_experience_programming.s + (1 | c | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(0, 0.5), class = "Intercept"),
prior(exponential(1), class = "sd", resp = "equalsexists"),
prior(exponential(1), class = "sd", resp = "hashcodeexists"),
prior(lkj(2), class = "L")
),
family = bernoulli(),
data = as.data.frame(d.completed),
file = "fits/utility1.all.exp",
file_refit = "on_change",
seed = 20210421
)
summary(utility1.all.exp)
## Family: MV(bernoulli, bernoulli)
## Links: mu = logit
## mu = logit
## Formula: hashcode.exists ~ 1 + high_debt_version + scenario + work_experience_programming.s + (1 | c | session)
## equals.exists ~ 1 + high_debt_version + scenario + work_experience_programming.s + (1 | c | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error
## sd(hashcodeexists_Intercept) 2.19 0.82
## sd(equalsexists_Intercept) 2.42 0.89
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.84 0.13
## l-95% CI u-95% CI Rhat
## sd(hashcodeexists_Intercept) 0.90 4.03 1.00
## sd(equalsexists_Intercept) 1.00 4.50 1.00
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 0.52 0.98 1.00
## Bulk_ESS Tail_ESS
## sd(hashcodeexists_Intercept) 2373 2497
## sd(equalsexists_Intercept) 2989 2746
## cor(hashcodeexists_Intercept,equalsexists_Intercept) 2823 2795
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI
## hashcodeexists_Intercept 0.27 0.59 -0.86
## equalsexists_Intercept 0.39 0.63 -0.82
## hashcodeexists_high_debt_versionfalse 0.13 0.65 -1.14
## hashcodeexists_scenariotickets -1.02 0.62 -2.23
## hashcodeexists_work_experience_programming.s -0.47 0.54 -1.56
## equalsexists_high_debt_versionfalse -0.11 0.65 -1.42
## equalsexists_scenariotickets -0.85 0.66 -2.16
## equalsexists_work_experience_programming.s -0.55 0.55 -1.64
## u-95% CI Rhat Bulk_ESS Tail_ESS
## hashcodeexists_Intercept 1.45 1.00 5946 3173
## equalsexists_Intercept 1.66 1.00 6217 3130
## hashcodeexists_high_debt_versionfalse 1.39 1.00 8529 2502
## hashcodeexists_scenariotickets 0.13 1.00 7455 3134
## hashcodeexists_work_experience_programming.s 0.60 1.00 4078 3258
## equalsexists_high_debt_versionfalse 1.10 1.00 6889 2874
## equalsexists_scenariotickets 0.43 1.00 8711 3330
## equalsexists_work_experience_programming.s 0.51 1.00 4014 2905
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(utility1.all.exp)
## $session
## , , hashcodeexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -1.73726574 1.850235 -6.0054868 1.1664915
## 6033d69a5af2c702367b3a95 -0.07501917 1.142924 -2.3937070 2.1676118
## 6033d90a5af2c702367b3a96 -0.11120847 1.175315 -2.5210838 2.2228470
## 6034fc165af2c702367b3a98 -0.02545292 1.124571 -2.1837413 2.2234540
## 603500725af2c702367b3a99 -2.58349553 1.841753 -6.9493422 0.1670257
## 603f84f15af2c702367b3a9b 2.09092821 1.874331 -0.7836159 6.3820920
## 603f97625af2c702367b3a9d -2.55861005 1.782973 -6.7102112 0.1542549
## 603fd5d95af2c702367b3a9e -0.09438248 1.148240 -2.4560780 2.1804795
## 60409b7b5af2c702367b3a9f -0.08920310 1.140049 -2.4481222 2.1520403
## 604b82b5a7718fbed181b336 2.43662248 1.828475 -0.1960311 6.7447975
## 604f1239a7718fbed181b33f -1.62088197 1.832896 -5.7641798 1.2296258
## 6050c1bf856f36729d2e5218 -2.36341803 1.786751 -6.7395032 0.2928369
## 6050e1e7856f36729d2e5219 0.01035557 1.119240 -2.2184438 2.2329983
## 6055fdc6856f36729d2e521b 2.42098346 1.713777 -0.2284155 6.5208755
## 60579f2a856f36729d2e521e 2.11986228 1.816354 -0.6438962 6.3972208
## 60589862856f36729d2e521f 0.89805500 1.435625 -1.8147772 3.9276145
## 605a30a7856f36729d2e5221 -1.56788143 1.791029 -5.5955015 1.3459510
## 605afa3a856f36729d2e5222 -1.95766327 1.811595 -6.2220730 0.8668285
## 605c8bc6856f36729d2e5223 -2.33118506 1.840058 -6.6222260 0.4493343
## 605f3f2d856f36729d2e5224 -1.71900173 2.017291 -6.3354385 1.6468785
## 605f46c3856f36729d2e5225 2.38657652 1.789617 -0.3028861 6.6586608
## 60605337856f36729d2e5226 -2.51298135 1.777455 -6.6605523 0.1669624
## 60609ae6856f36729d2e5228 -2.54256260 1.768304 -6.7464245 0.1935576
## 6061ce91856f36729d2e522e -0.06751999 1.130940 -2.4078857 2.1627648
## 6061f106856f36729d2e5231 0.67074929 1.199200 -1.6264735 3.2282053
## 60672faa856f36729d2e523c 2.04980102 1.858741 -0.9834708 6.3044820
## 6068ea9f856f36729d2e523e -2.44367709 1.834145 -6.9686367 0.1988493
## 606db69d856f36729d2e5243 2.04686375 1.838491 -0.8876078 6.2862668
## 6075ab05856f36729d2e5247 2.43749437 1.765096 -0.1928405 6.5367025
##
## , , equalsexists_Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -1.91476099 1.982069 -6.7222035 1.23407600
## 6033d69a5af2c702367b3a95 -0.11301551 1.221362 -2.5549070 2.27502850
## 6033d90a5af2c702367b3a96 -0.15455443 1.248765 -2.7564437 2.29904850
## 6034fc165af2c702367b3a98 -0.05916545 1.210230 -2.4991290 2.35414900
## 603500725af2c702367b3a99 -2.87249257 1.964693 -7.4369060 0.09599387
## 603f84f15af2c702367b3a9b 2.31260459 2.021844 -0.8717310 7.07486100
## 603f97625af2c702367b3a9d -2.83506042 1.975320 -7.6104060 0.07877289
## 603fd5d95af2c702367b3a9e -0.12992526 1.212107 -2.5697180 2.25112900
## 60409b7b5af2c702367b3a9f -0.12831745 1.245288 -2.6871135 2.34698450
## 604b82b5a7718fbed181b336 2.67009127 1.948842 -0.1935828 7.45404600
## 604f1239a7718fbed181b33f -1.83708278 1.973742 -6.5296635 1.40352550
## 6050c1bf856f36729d2e5218 -2.59760845 1.981022 -7.5699527 0.32565728
## 6050e1e7856f36729d2e5219 0.01036374 1.215124 -2.3536055 2.40768600
## 6055fdc6856f36729d2e521b 2.64694122 1.942273 -0.2579814 7.44606875
## 60579f2a856f36729d2e521e 2.32208559 2.003037 -0.7434413 6.94688575
## 60589862856f36729d2e521f 0.98472210 1.532857 -2.0225178 4.13727850
## 605a30a7856f36729d2e5221 -1.74195612 1.988721 -6.4471510 1.27551875
## 605afa3a856f36729d2e5222 -2.16076667 2.004333 -7.0338198 1.01442925
## 605c8bc6856f36729d2e5223 -2.60203570 2.055091 -7.6183605 0.40430575
## 605f3f2d856f36729d2e5224 -1.87330108 2.167104 -6.7709023 1.81447100
## 605f46c3856f36729d2e5225 2.62899663 1.910689 -0.2310519 7.18388975
## 60605337856f36729d2e5226 -2.74574974 1.918039 -7.4132087 0.17898775
## 60609ae6856f36729d2e5228 -2.79265421 1.932317 -7.4019770 0.07623070
## 6061ce91856f36729d2e522e -0.09357645 1.172657 -2.4446865 2.19015725
## 6061f106856f36729d2e5231 1.41597563 1.536149 -0.9812703 4.93050825
## 60672faa856f36729d2e523c 2.23621482 2.020576 -0.9203488 6.96052000
## 6068ea9f856f36729d2e523e -2.67769108 1.900880 -7.1141808 0.20804925
## 606db69d856f36729d2e5243 2.28035907 2.018021 -0.9425286 6.92073925
## 6075ab05856f36729d2e5247 2.68217392 1.964357 -0.2213006 7.33624825
loo(
utility1.all,
utility1.all.exp
)
## Output of model 'utility1.all':
##
## Computed from 4000 by 51 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.8 6.9
## p_loo 31.2 4.0
## looic 125.7 13.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 21 41.2% 657
## (0.5, 0.7] (ok) 15 29.4% 275
## (0.7, 1] (bad) 15 29.4% 42
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'utility1.all.exp':
##
## Computed from 4000 by 51 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.2 7.3
## p_loo 32.6 4.3
## looic 128.4 14.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 17 33.3% 865
## (0.5, 0.7] (ok) 17 33.3% 251
## (0.7, 1] (bad) 16 31.4% 29
## (1, Inf) (very bad) 1 2.0% 46
## See help('pareto-k-diagnostic') for details.
##
## Model comparisons:
## elpd_diff se_diff
## utility1.all 0.0 0.0
## utility1.all.exp -1.3 1.5
plot(utility1.all.exp, ask = FALSE)
pp_check(utility1.all.exp, type = "bars", nsamples = 200, resp = "hashcodeexists")
pp_check(utility1.all.exp, type = "bars", nsamples = 200, resp = "equalsexists")
This means that our final model, with all data points and experience predictors, is utility1.all.exp
To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.
mcmc_areas(utility1.all.exp, pars = c("b_equalsexists_high_debt_versionfalse", "b_equalsexists_work_experience_programming.s", "b_equalsexists_scenariotickets"), prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c("High debt version: false", "Professional programming experience", "Tickets scenario")) +
ggtitle("Beta parameters densities for equals implemntation", subtitle = "Shaded region marks 95% of the density. Line marks the median")
mcmc_areas(utility1.all.exp, pars = c("b_hashcodeexists_high_debt_versionfalse", "b_hashcodeexists_work_experience_programming.s", "b_hashcodeexists_scenariotickets"), prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c("High debt version: false", "Professional programming experience", "Tickets scenario")) +
ggtitle("Beta parameters densities for hashcode implemntation", subtitle = "Shaded region marks 95% of the density. Line marks the median")
When we look at effect size we will look at both outcomes combined
scale_programming_experience <- function(x) {
(x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}
post_settings <- expand.grid(
high_debt_version = c("false", "true"),
session = NA,
scenario = c("tickets", "booking"),
work_experience_programming.s = sapply(c(0, 3, 10, 25, 40), scale_programming_experience)
)
post <- posterior_predict(utility1.all.exp, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id", "model")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
select(
estimate,
high_debt_version,
work_experience_programming,
scenario,
model
)
post %>%
mutate_at("high_debt_version",
function(x) case_when(
x == "false" ~ "Low debt",
x == "true" ~ "High debt"
)) %>%
mutate_at("estimate",
function(x) case_when(
x == 1 ~ "Implemented",
x == 0 ~ "Not implemented"
)) %>%
ggplot(aes(high_debt_version)) +
geom_bar(aes(fill = estimate), position = "fill") +
facet_grid(rows = vars(work_experience_programming)) +
scale_fill_manual("Legend", values = c("lightblue", "darkblue")) +
labs(title = "Utility methods implementation / programming experience") +
xlab("Debt version") +
ylab("Ratio of utility methods implementation")
post %>%
mutate_at("high_debt_version",
function(x) case_when(
x == "false" ~ "Low debt",
x == "true" ~ "High debt"
)) %>%
mutate_at("estimate",
function(x) case_when(
x == 1 ~ "Implemented",
x == 0 ~ "Not implemented"
)) %>%
ggplot(aes(high_debt_version)) +
geom_bar(aes(fill = estimate), position = "fill") +
facet_grid(rows = vars(scenario)) +
scale_fill_manual("Legend", values = c("lightblue", "darkblue")) +
labs(title = "Utility methods implementation / Scenario") +
xlab("Debt version") +
ylab("Ratio of utility methods implementation")
As the effect is neglectable we will not compute any specific probabilities.