Looks like there is a slightly higher rate of negative ratings for the high debt group, but also an even smaller increase in positive ratings.
likert.data <- d.both_completed %>%
select(high_debt_version, quality_post_task)
likert.data$quality_post_task <- revalue(likert.data$quality_post_task, c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
))
likert.data$high_debt_version <- revalue(likert.data$high_debt_version, c(
"true"="High Debt",
"false"="Low Debt"
))
ggplot(likert.data, aes(x=quality_post_task)) +
geom_bar(fill= "Light Blue") +
facet_grid(rows = vars(high_debt_version)) +
scale_y_continuous(limits = NULL, breaks = c(2,4,6,8), labels = c("2","4","6","8")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
As the data is collected from a likert scale we will use a cumulative family, indicating that each level on the scale is an incremental step. This model is also able to fit the data well.
We include high_debt_verison
as a predictor in our model
as this variable represent the very effect we want to measure. We also
include a varying intercept for each individual to prevent the model
from learning too much from single participants with extreme
measurements.
We iterate over the model until we have sane priors.
own_quality.with <- extendable_model(
base_name = "own_quality",
base_formula = "quality_post_task ~ 1 + (1 | session)",
base_priors = c(
prior(normal(0, 2.5), class = "Intercept"),
#prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = d.both_completed,
base_control = list(adapt_delta = 0.95)
)
prior_summary(own_quality.with(only_priors= TRUE))
prior_summary(own_quality.with(sample_prior = "only"))
pp_check(own_quality.with(sample_prior = "only"), nsamples = 200, type = "bars")
We choose a beta parameter priors allowing for the beta parameter to account for 100% of the effect but that is skeptical to such strong effects from the beta parameter.
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 0, 2.5)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100
data.frame(x = sim.beta.diff) %>%
ggplot(aes(x)) +
geom_density() +
xlim(-150, 150) +
labs(
title = "Beta parameter prior influence",
x = "Estimate with beta as % of estimate without beta",
y = "Density"
)
We check the posterior distribution and can see that the model seems to have been able to fit the data well. Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.
pp_check(own_quality.with(), nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
summary(own_quality.with())
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + (1 | session)
## Data: as.data.frame(data) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.51 0.38 0.02 1.43 1.00 1589 2260
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept[1] -3.24 0.73 -4.83 -1.95 1.00 3196 2447
## Intercept[2] -0.84 0.36 -1.62 -0.19 1.00 4063 3480
## Intercept[3] 1.65 0.42 0.89 2.51 1.00 4608 3463
## Intercept[4] 2.38 0.53 1.46 3.54 1.00 5464 3207
## Intercept[5] 3.32 0.73 2.07 4.88 1.00 6403 3543
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(own_quality.with(), ask = FALSE)
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
# deafult beta prior
beta_prior <- prior(normal(0, 1), class = "b")
We use loo
to check some possible extensions on the
model.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
# New model(s)
own_quality.with("var_names_new_good.ratio", beta_prior),
own_quality.with("var_names_copied_good.ratio", beta_prior),
own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior)
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality.with() 0.0
## own_quality.with("var_names_copied_good.ratio", beta_prior) -0.2
## own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior) -0.4
## own_quality.with("var_names_new_good.ratio", beta_prior) -0.4
## se_diff
## own_quality.with() 0.0
## own_quality.with("var_names_copied_good.ratio", beta_prior) 0.7
## own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior) 0.7
## own_quality.with("var_names_new_good.ratio", beta_prior) 0.2
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 901
## (0.5, 0.7] (ok) 7 15.9% 1126
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("var_names_new_good.ratio", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.6 6.8
## p_loo 8.7 1.7
## looic 125.3 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 40 90.9% 1167
## (0.5, 0.7] (ok) 4 9.1% 380
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("var_names_copied_good.ratio", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.5 6.8
## p_loo 8.7 1.7
## looic 124.9 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1266
## (0.5, 0.7] (ok) 5 11.4% 1037
## (0.7, 1] (bad) 1 2.3% 323
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.6 6.7
## p_loo 9.0 1.7
## looic 125.2 13.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1031
## (0.5, 0.7] (ok) 5 11.4% 935
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
# New model(s)
own_quality.with("reused_logic_constructor", beta_prior),
own_quality.with("reused_logic_validation", beta_prior),
own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior)
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality.with("reused_logic_validation", beta_prior) 0.0
## own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior) -0.7
## own_quality.with("reused_logic_constructor", beta_prior) -1.7
## own_quality.with() -2.2
## se_diff
## own_quality.with("reused_logic_validation", beta_prior) 0.0
## own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior) 0.3
## own_quality.with("reused_logic_constructor", beta_prior) 1.1
## own_quality.with() 1.5
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 901
## (0.5, 0.7] (ok) 7 15.9% 1126
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("reused_logic_constructor", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.8 6.6
## p_loo 9.1 1.7
## looic 123.5 13.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 40 90.9% 1218
## (0.5, 0.7] (ok) 3 6.8% 508
## (0.7, 1] (bad) 1 2.3% 140
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("reused_logic_validation", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.1 6.1
## p_loo 8.4 1.5
## looic 120.2 12.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1180
## (0.5, 0.7] (ok) 6 13.6% 1472
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.7 6.2
## p_loo 9.3 1.6
## looic 121.5 12.3
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 40 90.9% 1329
## (0.5, 0.7] (ok) 4 9.1% 584
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
# New model(s)
own_quality.with("equals.exists", beta_prior),
own_quality.with("hashcode.exists", beta_prior),
own_quality.with(c("hashcode.state", "equals.exists"), beta_prior)
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality.with() 0.0
## own_quality.with("equals.exists", beta_prior) -0.6
## own_quality.with("hashcode.exists", beta_prior) -0.7
## own_quality.with(c("hashcode.state", "equals.exists"), beta_prior) -0.7
## se_diff
## own_quality.with() 0.0
## own_quality.with("equals.exists", beta_prior) 0.1
## own_quality.with("hashcode.exists", beta_prior) 0.2
## own_quality.with(c("hashcode.state", "equals.exists"), beta_prior) 1.1
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 901
## (0.5, 0.7] (ok) 7 15.9% 1126
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("equals.exists", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.9 6.9
## p_loo 9.0 1.7
## looic 125.7 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 694
## (0.5, 0.7] (ok) 3 6.8% 1152
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("hashcode.exists", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.9 6.9
## p_loo 9.0 1.8
## looic 125.8 13.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 949
## (0.5, 0.7] (ok) 5 11.4% 488
## (0.7, 1] (bad) 1 2.3% 231
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with(c("hashcode.state", "equals.exists"), beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.9 6.8
## p_loo 10.1 1.9
## looic 125.8 13.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1158
## (0.5, 0.7] (ok) 6 13.6% 591
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
# New model(s)
own_quality.with("sonarqube_issues.s"),
own_quality.with("documentation")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## own_quality.with() 0.0 0.0
## own_quality.with("sonarqube_issues.s") -1.7 1.2
## own_quality.with("documentation") -2.4 1.3
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 901
## (0.5, 0.7] (ok) 7 15.9% 1126
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("sonarqube_issues.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.9 7.3
## p_loo 10.3 2.4
## looic 127.9 14.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 290
## (0.5, 0.7] (ok) 5 11.4% 437
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("documentation")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.6 7.3
## p_loo 11.3 2.4
## looic 129.3 14.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 698
## (0.5, 0.7] (ok) 6 13.6% 251
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
We create a new base model based on what we learned from extending the previous model with different quality measurements. We take care to not include redundant quality indicators.
own_quality1.with <- extendable_model(
base_name = "own_quality1",
base_formula = "quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session)",
base_priors = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = data.frame(d.both_completed),
base_control = list(adapt_delta = 0.95)
)
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
own_quality1.with(),
# New model(s)
own_quality1.with("work_domain"),
own_quality1.with("work_experience_programming.s"),
own_quality1.with("work_experience_java.s"),
own_quality1.with("education_field"),
own_quality1.with("mo(education_level)", edlvl_prior),
own_quality1.with("workplace_peer_review"),
own_quality1.with("workplace_td_tracking"),
own_quality1.with("workplace_pair_programming"),
own_quality1.with("workplace_coding_standards"),
own_quality1.with("scenario"),
own_quality1.with("group")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0 0.0
## own_quality.with() -1.0 3.0
## own_quality1.with("education_field") -2.7 2.8
## own_quality1.with() -2.8 2.7
## own_quality1.with("workplace_peer_review") -2.9 2.4
## own_quality1.with("workplace_coding_standards") -3.4 2.5
## own_quality1.with("group") -3.4 2.2
## own_quality1.with("workplace_td_tracking") -3.5 2.3
## own_quality1.with("scenario") -3.5 2.6
## own_quality1.with("workplace_pair_programming") -3.6 2.7
## own_quality1.with("work_experience_programming.s") -4.1 2.0
## own_quality1.with("work_domain") -4.2 2.7
## own_quality1.with("work_experience_java.s") -4.2 2.2
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 901
## (0.5, 0.7] (ok) 7 15.9% 1126
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.0 6.7
## p_loo 13.3 2.6
## looic 128.0 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1010
## (0.5, 0.7] (ok) 5 11.4% 409
## (0.7, 1] (bad) 1 2.3% 123
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("work_domain")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -65.4 7.2
## p_loo 17.0 3.3
## looic 130.8 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 778
## (0.5, 0.7] (ok) 5 11.4% 157
## (0.7, 1] (bad) 1 2.3% 106
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -65.3 7.0
## p_loo 15.2 3.1
## looic 130.5 14.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 34 77.3% 487
## (0.5, 0.7] (ok) 10 22.7% 288
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -65.4 6.9
## p_loo 15.1 3.0
## looic 130.9 13.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 36 81.8% 768
## (0.5, 0.7] (ok) 7 15.9% 251
## (0.7, 1] (bad) 1 2.3% 303
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.9 6.7
## p_loo 13.9 2.5
## looic 127.8 13.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 624
## (0.5, 0.7] (ok) 4 9.1% 1059
## (0.7, 1] (bad) 2 4.5% 198
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.2 7.2
## p_loo 13.0 2.6
## looic 122.4 14.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1199
## (0.5, 0.7] (ok) 4 9.1% 352
## (0.7, 1] (bad) 1 2.3% 218
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.1 6.8
## p_loo 13.7 2.5
## looic 128.3 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 665
## (0.5, 0.7] (ok) 5 11.4% 230
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.7 6.8
## p_loo 14.2 2.6
## looic 129.4 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 730
## (0.5, 0.7] (ok) 7 15.9% 164
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.8 6.8
## p_loo 14.2 2.6
## looic 129.7 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 774
## (0.5, 0.7] (ok) 5 11.4% 337
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.6 6.7
## p_loo 14.1 2.6
## looic 129.2 13.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 571
## (0.5, 0.7] (ok) 7 15.9% 307
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.7 6.6
## p_loo 14.1 2.5
## looic 129.4 13.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 569
## (0.5, 0.7] (ok) 6 13.6% 343
## (0.7, 1] (bad) 1 2.3% 3531
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("group")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.6 7.0
## p_loo 14.7 2.7
## looic 129.3 13.9
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 36 81.8% 743
## (0.5, 0.7] (ok) 8 18.2% 404
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
own_quality1.with(),
own_quality1.with("mo(education_level)", edlvl_prior),
own_quality1.with("education_field"),
own_quality1.with("workplace_peer_review"),
# New model(s)
own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior),
own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior),
own_quality1.with(c("education_field", "workplace_peer_review"))
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior) -0.3
## own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior) -0.6
## own_quality.with() -1.0
## own_quality1.with("education_field") -2.7
## own_quality1.with() -2.8
## own_quality1.with("workplace_peer_review") -2.9
## own_quality1.with(c("education_field", "workplace_peer_review")) -3.1
## se_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior) 0.3
## own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior) 0.5
## own_quality.with() 3.0
## own_quality1.with("education_field") 2.8
## own_quality1.with() 2.7
## own_quality1.with("workplace_peer_review") 2.4
## own_quality1.with(c("education_field", "workplace_peer_review")) 2.6
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 901
## (0.5, 0.7] (ok) 7 15.9% 1126
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.0 6.7
## p_loo 13.3 2.6
## looic 128.0 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1010
## (0.5, 0.7] (ok) 5 11.4% 409
## (0.7, 1] (bad) 1 2.3% 123
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.2 7.2
## p_loo 13.0 2.6
## looic 122.4 14.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1199
## (0.5, 0.7] (ok) 4 9.1% 352
## (0.7, 1] (bad) 1 2.3% 218
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.9 6.7
## p_loo 13.9 2.5
## looic 127.8 13.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 624
## (0.5, 0.7] (ok) 4 9.1% 1059
## (0.7, 1] (bad) 2 4.5% 198
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.1 6.8
## p_loo 13.7 2.5
## looic 128.3 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 665
## (0.5, 0.7] (ok) 5 11.4% 230
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.8 7.2
## p_loo 14.0 2.7
## looic 123.6 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 40 90.9% 564
## (0.5, 0.7] (ok) 3 6.8% 952
## (0.7, 1] (bad) 1 2.3% 306
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.5 7.2
## p_loo 13.4 2.6
## looic 123.0 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 566
## (0.5, 0.7] (ok) 2 4.5% 339
## (0.7, 1] (bad) 1 2.3% 352
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("education_field", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.3 6.7
## p_loo 14.4 2.5
## looic 128.6 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 806
## (0.5, 0.7] (ok) 4 9.1% 1396
## (0.7, 1] (bad) 1 2.3% 181
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
own_quality1.with(),
own_quality1.with("mo(education_level)", edlvl_prior),
own_quality1.with("education_field"),
own_quality1.with("workplace_peer_review"),
own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior),
own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior),
# New model(s)
own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior)
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior) -0.3
## own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior) -0.6
## own_quality.with() -1.0
## own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior) -1.4
## own_quality1.with("education_field") -2.7
## own_quality1.with() -2.8
## own_quality1.with("workplace_peer_review") -2.9
## se_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior) 0.3
## own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior) 0.5
## own_quality.with() 3.0
## own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior) 0.5
## own_quality1.with("education_field") 2.8
## own_quality1.with() 2.7
## own_quality1.with("workplace_peer_review") 2.4
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 901
## (0.5, 0.7] (ok) 7 15.9% 1126
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.0 6.7
## p_loo 13.3 2.6
## looic 128.0 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1010
## (0.5, 0.7] (ok) 5 11.4% 409
## (0.7, 1] (bad) 1 2.3% 123
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.2 7.2
## p_loo 13.0 2.6
## looic 122.4 14.5
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1199
## (0.5, 0.7] (ok) 4 9.1% 352
## (0.7, 1] (bad) 1 2.3% 218
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.9 6.7
## p_loo 13.9 2.5
## looic 127.8 13.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 624
## (0.5, 0.7] (ok) 4 9.1% 1059
## (0.7, 1] (bad) 2 4.5% 198
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.1 6.8
## p_loo 13.7 2.5
## looic 128.3 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 665
## (0.5, 0.7] (ok) 5 11.4% 230
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.8 7.2
## p_loo 14.0 2.7
## looic 123.6 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 40 90.9% 564
## (0.5, 0.7] (ok) 3 6.8% 952
## (0.7, 1] (bad) 1 2.3% 306
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.5 7.2
## p_loo 13.4 2.6
## looic 123.0 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 566
## (0.5, 0.7] (ok) 2 4.5% 339
## (0.7, 1] (bad) 1 2.3% 352
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.6 7.3
## p_loo 14.8 2.8
## looic 125.2 14.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1057
## (0.5, 0.7] (ok) 4 9.1% 182
## (0.7, 1] (bad) 1 2.3% 330
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
We pick some of our top performing models as candidates and inspect them closer.
The candidate models are named and listed in order of complexity.
We select the simplest model as a baseline.
own_quality0 <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session)",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/own_quality0",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality0)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.54 0.41 0.02 1.50 1.00 1377 1852
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -4.12 1.20 -6.52 -1.86 1.00 4382
## Intercept[2] -1.61 1.01 -3.55 0.44 1.00 5285
## Intercept[3] 1.10 1.03 -0.85 3.18 1.00 4168
## Intercept[4] 1.88 1.06 -0.08 4.08 1.00 4500
## Intercept[5] 2.85 1.15 0.71 5.23 1.00 4646
## var_names_copied_good.ratio 0.18 0.68 -1.15 1.48 1.00 5869
## var_names_new_good.ratio -0.20 0.76 -1.68 1.31 1.00 5669
## reused_logic_validationfalse -1.26 0.67 -2.57 0.05 1.00 4748
## equals.existsFALSE -0.11 0.58 -1.22 1.07 1.00 4360
## sonarqube_issues.s 0.15 0.32 -0.51 0.76 1.00 5294
## documentationIncorrect -0.08 0.70 -1.42 1.25 1.00 4547
## documentationNone -0.31 0.67 -1.62 0.99 1.00 4767
## Tail_ESS
## Intercept[1] 2579
## Intercept[2] 3155
## Intercept[3] 2575
## Intercept[4] 2813
## Intercept[5] 2671
## var_names_copied_good.ratio 3184
## var_names_new_good.ratio 2806
## reused_logic_validationfalse 3010
## equals.existsFALSE 1868
## sonarqube_issues.s 3087
## documentationIncorrect 2868
## documentationNone 3423
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality0)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.004665587 0.5271372 -1.1449885 1.1827776
## 6033d90a5af2c702367b3a96 0.023284686 0.5501864 -1.1246254 1.2622077
## 6034fc165af2c702367b3a98 0.227026451 0.6240559 -0.7973700 1.8220845
## 603500725af2c702367b3a99 0.200320329 0.5833540 -0.7840352 1.6579879
## 603f97625af2c702367b3a9d 0.182251311 0.5817026 -0.8139791 1.6569444
## 603fd5d95af2c702367b3a9e 0.215486492 0.5698347 -0.7538640 1.5917187
## 60409b7b5af2c702367b3a9f -0.277551611 0.6125458 -1.8649792 0.6653697
## 604b82b5a7718fbed181b336 -0.125033006 0.5701509 -1.5287796 0.9794940
## 6050c1bf856f36729d2e5218 -0.364047446 0.6917797 -2.1648627 0.6225998
## 6050e1e7856f36729d2e5219 0.288414457 0.6607054 -0.7358960 1.9325283
## 6055fdc6856f36729d2e521b -0.086786709 0.5699652 -1.4577966 1.0608163
## 60589862856f36729d2e521f 0.016301411 0.6324473 -1.3462922 1.3726167
## 605afa3a856f36729d2e5222 0.113368643 0.5679028 -0.9928262 1.4190855
## 605c8bc6856f36729d2e5223 0.123040831 0.5646846 -0.9752734 1.4696968
## 605f3f2d856f36729d2e5224 -0.261539668 0.6369305 -1.9414778 0.7675169
## 605f46c3856f36729d2e5225 -0.421105400 0.7302169 -2.3579902 0.5425406
## 60605337856f36729d2e5226 -0.035029702 0.5678189 -1.2934346 1.1889720
## 60609ae6856f36729d2e5228 -0.107957141 0.5530194 -1.4493475 0.9717934
## 6061ce91856f36729d2e522e 0.255057801 0.6157792 -0.7558465 1.8528779
## 6061f106856f36729d2e5231 0.037582109 0.5418064 -1.0923383 1.2626361
## 6068ea9f856f36729d2e523e 0.208912425 0.5985266 -0.8457582 1.7447481
## 6075ab05856f36729d2e5247 -0.444331583 0.7311070 -2.3089178 0.5186057
plot(own_quality0, ask = FALSE)
pp_check(own_quality0, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
We select the best performing model with one variable.
own_quality1 <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session) +
mo(education_level)",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd"),
prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/own_quality1",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality1)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) + mo(education_level)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.40 0.34 0.01 1.25 1.00 2245 2330
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -5.53 1.33 -8.19 -2.96 1.00 4343
## Intercept[2] -2.98 1.18 -5.34 -0.73 1.00 4720
## Intercept[3] 0.06 1.11 -2.17 2.19 1.00 5300
## Intercept[4] 0.92 1.13 -1.27 3.10 1.00 5847
## Intercept[5] 1.95 1.19 -0.34 4.31 1.00 5923
## var_names_copied_good.ratio 0.29 0.68 -1.03 1.60 1.00 5342
## var_names_new_good.ratio -0.20 0.73 -1.62 1.22 1.00 6497
## reused_logic_validationfalse -1.14 0.69 -2.48 0.20 1.00 5877
## equals.existsFALSE 0.00 0.59 -1.13 1.18 1.00 7504
## sonarqube_issues.s 0.33 0.33 -0.34 0.97 1.00 6788
## documentationIncorrect -0.20 0.70 -1.60 1.18 1.00 5947
## documentationNone -0.35 0.68 -1.65 0.95 1.00 5406
## moeducation_level -0.62 0.24 -1.10 -0.14 1.00 4044
## Tail_ESS
## Intercept[1] 3011
## Intercept[2] 3165
## Intercept[3] 3294
## Intercept[4] 3301
## Intercept[5] 3663
## var_names_copied_good.ratio 3350
## var_names_new_good.ratio 3030
## reused_logic_validationfalse 2850
## equals.existsFALSE 3325
## sonarqube_issues.s 3157
## documentationIncorrect 2731
## documentationNone 3139
## moeducation_level 3007
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## moeducation_level1[1] 0.21 0.12 0.04 0.49 1.00 7442
## moeducation_level1[2] 0.29 0.15 0.05 0.61 1.00 6584
## moeducation_level1[3] 0.24 0.13 0.04 0.54 1.00 7079
## moeducation_level1[4] 0.26 0.14 0.05 0.56 1.00 6149
## Tail_ESS
## moeducation_level1[1] 2541
## moeducation_level1[2] 2675
## moeducation_level1[3] 2917
## moeducation_level1[4] 2774
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality1)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.060421418 0.4474515 -0.8219355 1.1664143
## 6033d90a5af2c702367b3a96 -0.070607334 0.4486236 -1.1144652 0.8240244
## 6034fc165af2c702367b3a98 0.171986384 0.5122651 -0.6434738 1.5118743
## 603500725af2c702367b3a99 0.051399096 0.4717215 -0.9177683 1.1503399
## 603f97625af2c702367b3a9d 0.149034948 0.5164489 -0.7244956 1.4400761
## 603fd5d95af2c702367b3a9e 0.063851620 0.4693014 -0.8726579 1.1986143
## 60409b7b5af2c702367b3a9f -0.166169658 0.5097604 -1.4964202 0.6537067
## 604b82b5a7718fbed181b336 0.006109449 0.4636015 -1.0226025 1.0076475
## 6050c1bf856f36729d2e5218 -0.174832601 0.5141417 -1.4825647 0.6404709
## 6050e1e7856f36729d2e5219 0.128640507 0.4793011 -0.7086915 1.3221355
## 6055fdc6856f36729d2e521b -0.084090282 0.4906364 -1.3132208 0.8137954
## 60589862856f36729d2e521f 0.035911177 0.4906062 -0.9472696 1.1771830
## 605afa3a856f36729d2e5222 0.123931386 0.4821368 -0.7407700 1.3444923
## 605c8bc6856f36729d2e5223 0.200810030 0.5199002 -0.5847659 1.5845660
## 605f3f2d856f36729d2e5224 -0.136639502 0.4852726 -1.3773358 0.6892590
## 605f46c3856f36729d2e5225 -0.153429698 0.5083398 -1.5200230 0.6670267
## 60605337856f36729d2e5226 0.027154425 0.4910657 -0.9848317 1.1233308
## 60609ae6856f36729d2e5228 -0.071853728 0.4490669 -1.1682429 0.7967916
## 6061ce91856f36729d2e522e 0.113331195 0.4837503 -0.7028130 1.3565083
## 6061f106856f36729d2e5231 -0.053457133 0.4563305 -1.1416028 0.8943779
## 6068ea9f856f36729d2e523e 0.014520346 0.4376197 -0.8995343 1.0167069
## 6075ab05856f36729d2e5247 -0.189252397 0.5080556 -1.5618180 0.5871634
plot(own_quality1, ask = FALSE)
pp_check(own_quality1, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
We select the best performing model with two variables.
own_quality2 <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session) +
mo(education_level) +
education_field",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd"),
prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/own_quality2",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality2)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) + mo(education_level) + education_field
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.43 0.34 0.01 1.25 1.00 2324 2224
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -5.40 1.42 -8.30 -2.68 1.00
## Intercept[2] -2.82 1.26 -5.35 -0.35 1.00
## Intercept[3] 0.26 1.22 -2.16 2.62 1.00
## Intercept[4] 1.11 1.25 -1.29 3.56 1.00
## Intercept[5] 2.15 1.33 -0.41 4.88 1.00
## var_names_copied_good.ratio 0.29 0.68 -1.11 1.63 1.00
## var_names_new_good.ratio -0.14 0.76 -1.61 1.42 1.00
## reused_logic_validationfalse -1.16 0.67 -2.48 0.11 1.00
## equals.existsFALSE 0.02 0.60 -1.13 1.18 1.00
## sonarqube_issues.s 0.33 0.33 -0.34 0.93 1.00
## documentationIncorrect -0.23 0.75 -1.76 1.24 1.00
## documentationNone -0.32 0.67 -1.62 0.99 1.00
## education_fieldInteractionDesign -0.55 0.83 -2.19 1.07 1.00
## education_fieldNone 0.05 0.86 -1.66 1.68 1.00
## education_fieldSoftwareEngineering 0.18 0.58 -0.93 1.29 1.00
## moeducation_level -0.60 0.25 -1.11 -0.12 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5238 3056
## Intercept[2] 5781 3357
## Intercept[3] 6526 3252
## Intercept[4] 6526 3251
## Intercept[5] 6843 3411
## var_names_copied_good.ratio 5257 2958
## var_names_new_good.ratio 5628 2933
## reused_logic_validationfalse 6096 3450
## equals.existsFALSE 6505 3089
## sonarqube_issues.s 5480 2844
## documentationIncorrect 5800 2840
## documentationNone 6454 2924
## education_fieldInteractionDesign 6077 2829
## education_fieldNone 6250 3102
## education_fieldSoftwareEngineering 7173 3918
## moeducation_level 4320 3635
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## moeducation_level1[1] 0.22 0.13 0.03 0.51 1.00 5329
## moeducation_level1[2] 0.28 0.15 0.05 0.61 1.00 6568
## moeducation_level1[3] 0.24 0.13 0.04 0.54 1.00 6008
## moeducation_level1[4] 0.26 0.14 0.04 0.55 1.00 5909
## Tail_ESS
## moeducation_level1[1] 2196
## moeducation_level1[2] 3083
## moeducation_level1[3] 3158
## moeducation_level1[4] 2610
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality2)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.07257511 0.4853450 -0.8958344 1.2060655
## 6033d90a5af2c702367b3a96 -0.06719129 0.4655512 -1.1743671 0.8630083
## 6034fc165af2c702367b3a98 0.17905977 0.5349648 -0.6912089 1.5669707
## 603500725af2c702367b3a99 0.06251738 0.4870600 -0.8967308 1.2174737
## 603f97625af2c702367b3a9d 0.16271186 0.5225694 -0.7024703 1.5079167
## 603fd5d95af2c702367b3a9e 0.06217807 0.4687142 -0.8778565 1.1660744
## 60409b7b5af2c702367b3a9f -0.19689467 0.5279622 -1.5595090 0.6749143
## 604b82b5a7718fbed181b336 -0.01780412 0.4656230 -1.0741140 1.0102329
## 6050c1bf856f36729d2e5218 -0.20916246 0.5229909 -1.5947023 0.5935431
## 6050e1e7856f36729d2e5219 0.14039057 0.5056946 -0.7577146 1.4154486
## 6055fdc6856f36729d2e521b -0.07063987 0.5026330 -1.2795156 0.9609800
## 60589862856f36729d2e521f 0.03437286 0.5146656 -1.0082980 1.2447815
## 605afa3a856f36729d2e5222 0.16173501 0.5132568 -0.7044857 1.4873283
## 605c8bc6856f36729d2e5223 0.20520358 0.5392337 -0.6277315 1.6514052
## 605f3f2d856f36729d2e5224 -0.14931287 0.5247428 -1.5037436 0.7438351
## 605f46c3856f36729d2e5225 -0.19596157 0.5299663 -1.5836976 0.6472067
## 60605337856f36729d2e5226 0.02973059 0.4965926 -0.9911370 1.1770632
## 60609ae6856f36729d2e5228 -0.08101247 0.4916873 -1.2623611 0.8764413
## 6061ce91856f36729d2e522e 0.12577359 0.5007508 -0.7203002 1.3905577
## 6061f106856f36729d2e5231 -0.04526178 0.4834000 -1.1826973 0.9366807
## 6068ea9f856f36729d2e523e 0.02953905 0.4897309 -0.9708131 1.1652480
## 6075ab05856f36729d2e5247 -0.13579351 0.5219401 -1.4493301 0.7814682
plot(own_quality2, ask = FALSE)
pp_check(own_quality2, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
We select the best performing model with three variables.
own_quality3 <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session) +
mo(education_level) +
education_field +
workplace_peer_review",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd"),
prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/own_quality3",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality3)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) + mo(education_level) + education_field + workplace_peer_review
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.44 0.35 0.02 1.30 1.00 2245 1980
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -5.39 1.42 -8.25 -2.72 1.00
## Intercept[2] -2.82 1.29 -5.38 -0.40 1.00
## Intercept[3] 0.26 1.27 -2.31 2.74 1.00
## Intercept[4] 1.12 1.29 -1.45 3.73 1.00
## Intercept[5] 2.15 1.37 -0.53 4.96 1.00
## var_names_copied_good.ratio 0.29 0.68 -1.03 1.66 1.00
## var_names_new_good.ratio -0.14 0.76 -1.64 1.35 1.00
## reused_logic_validationfalse -1.14 0.67 -2.46 0.17 1.00
## equals.existsFALSE 0.03 0.59 -1.09 1.20 1.00
## sonarqube_issues.s 0.33 0.34 -0.35 0.98 1.00
## documentationIncorrect -0.22 0.72 -1.64 1.21 1.00
## documentationNone -0.30 0.68 -1.65 1.06 1.00
## education_fieldInteractionDesign -0.55 0.84 -2.20 1.08 1.00
## education_fieldNone 0.06 0.86 -1.65 1.75 1.00
## education_fieldSoftwareEngineering 0.18 0.61 -0.98 1.39 1.00
## workplace_peer_reviewfalse -0.07 0.59 -1.24 1.07 1.00
## moeducation_level -0.60 0.26 -1.12 -0.10 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5079 2992
## Intercept[2] 6292 3235
## Intercept[3] 6699 3323
## Intercept[4] 6665 3418
## Intercept[5] 7096 3520
## var_names_copied_good.ratio 6212 3517
## var_names_new_good.ratio 6631 3251
## reused_logic_validationfalse 6912 3165
## equals.existsFALSE 7206 3488
## sonarqube_issues.s 6610 3267
## documentationIncorrect 6579 3475
## documentationNone 6114 2692
## education_fieldInteractionDesign 7672 3349
## education_fieldNone 7927 3290
## education_fieldSoftwareEngineering 7484 3490
## workplace_peer_reviewfalse 6657 2938
## moeducation_level 4353 3136
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## moeducation_level1[1] 0.22 0.12 0.03 0.50 1.00 7725
## moeducation_level1[2] 0.28 0.15 0.05 0.61 1.00 7058
## moeducation_level1[3] 0.24 0.14 0.03 0.55 1.00 7342
## moeducation_level1[4] 0.26 0.14 0.04 0.55 1.00 6524
## Tail_ESS
## moeducation_level1[1] 2572
## moeducation_level1[2] 2974
## moeducation_level1[3] 2944
## moeducation_level1[4] 3096
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality3)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.07292083 0.4786193 -0.8662791 1.1845024
## 6033d90a5af2c702367b3a96 -0.07020497 0.4741531 -1.2300104 0.8881639
## 6034fc165af2c702367b3a98 0.18877404 0.5511362 -0.7364588 1.5693337
## 603500725af2c702367b3a99 0.07164407 0.5091508 -0.9227960 1.2644073
## 603f97625af2c702367b3a9d 0.16239819 0.5276444 -0.7347354 1.5347753
## 603fd5d95af2c702367b3a9e 0.08224825 0.4784688 -0.8329144 1.1904050
## 60409b7b5af2c702367b3a9f -0.19021499 0.5249898 -1.5781463 0.6258941
## 604b82b5a7718fbed181b336 -0.01420662 0.4912693 -1.1131575 1.1353241
## 6050c1bf856f36729d2e5218 -0.21692246 0.5396178 -1.6765630 0.5771092
## 6050e1e7856f36729d2e5219 0.15955201 0.5396768 -0.7411380 1.5334869
## 6055fdc6856f36729d2e521b -0.08413992 0.4985885 -1.3187810 0.8729798
## 60589862856f36729d2e521f 0.03754052 0.5382152 -1.0320576 1.3512681
## 605afa3a856f36729d2e5222 0.16252356 0.5305735 -0.7440951 1.5147409
## 605c8bc6856f36729d2e5223 0.20906027 0.5536733 -0.6712950 1.6124915
## 605f3f2d856f36729d2e5224 -0.16107686 0.5186055 -1.5039316 0.7075811
## 605f46c3856f36729d2e5225 -0.20919387 0.5512730 -1.6114661 0.6417108
## 60605337856f36729d2e5226 0.02487511 0.5096511 -1.0500033 1.1841955
## 60609ae6856f36729d2e5228 -0.07561048 0.4857031 -1.2397214 0.8791959
## 6061ce91856f36729d2e522e 0.11888767 0.5150776 -0.7873797 1.4099026
## 6061f106856f36729d2e5231 -0.06716551 0.4942525 -1.2691431 0.9238359
## 6068ea9f856f36729d2e523e 0.03607078 0.5193162 -1.0147701 1.2208109
## 6075ab05856f36729d2e5247 -0.15907814 0.5226057 -1.5145098 0.7168122
plot(own_quality3, ask = FALSE)
pp_check(own_quality3, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
All candidate models look nice, none is significantly better than the
others, we will proceed the model containing work experince as it
otherwise ourd be added in the next step: own_quality0
Some participants did only complete one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.
own_quality0.all <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session)",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.completed),
file = "fits/own_quality0.all",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality0.all)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.52 0.40 0.01 1.44 1.00 1369 1807
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -4.79 1.24 -7.30 -2.46 1.00 3860
## Intercept[2] -3.50 1.07 -5.65 -1.42 1.00 5106
## Intercept[3] -1.43 0.98 -3.40 0.51 1.00 5524
## Intercept[4] 1.46 1.00 -0.43 3.46 1.00 4704
## Intercept[5] 2.20 1.04 0.20 4.27 1.00 5141
## Intercept[6] 3.15 1.15 0.91 5.43 1.00 5281
## var_names_copied_good.ratio 0.19 0.67 -1.13 1.48 1.00 5208
## var_names_new_good.ratio -0.03 0.75 -1.51 1.38 1.00 5992
## reused_logic_validationfalse -1.30 0.65 -2.55 -0.01 1.00 5323
## equals.existsFALSE -0.15 0.58 -1.29 1.02 1.00 5245
## sonarqube_issues.s 0.08 0.31 -0.57 0.67 1.00 5765
## documentationIncorrect -0.16 0.64 -1.42 1.10 1.00 4830
## documentationNone -0.30 0.60 -1.45 0.94 1.00 5136
## Tail_ESS
## Intercept[1] 2992
## Intercept[2] 3107
## Intercept[3] 2701
## Intercept[4] 3188
## Intercept[5] 3258
## Intercept[6] 2974
## var_names_copied_good.ratio 3367
## var_names_new_good.ratio 3114
## reused_logic_validationfalse 3515
## equals.existsFALSE 2902
## sonarqube_issues.s 3206
## documentationIncorrect 2932
## documentationNone 2934
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 NA NA NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality0.all)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -0.372408386 0.7841800 -2.4500565 0.6509531
## 6033d69a5af2c702367b3a95 0.040479850 0.5375009 -1.0863194 1.2587577
## 6033d90a5af2c702367b3a96 0.045434348 0.5189697 -1.0625645 1.2324326
## 6034fc165af2c702367b3a98 0.237236539 0.6271860 -0.7108012 1.8404439
## 603500725af2c702367b3a99 0.230201781 0.5779790 -0.7195627 1.6608285
## 603f84f15af2c702367b3a9b 0.016198984 0.5882814 -1.2750833 1.3550994
## 603f97625af2c702367b3a9d 0.212381556 0.5892914 -0.7669954 1.6730726
## 603fd5d95af2c702367b3a9e 0.237127902 0.6008399 -0.7474289 1.7871624
## 60409b7b5af2c702367b3a9f -0.204543679 0.5660094 -1.6241742 0.7465444
## 604b82b5a7718fbed181b336 -0.065847643 0.5548136 -1.3918616 1.0274548
## 604f1239a7718fbed181b33f 0.017712987 0.5674296 -1.2371173 1.2916858
## 6050c1bf856f36729d2e5218 -0.294707800 0.6393358 -1.9273966 0.6900997
## 6050e1e7856f36729d2e5219 0.302869045 0.6535337 -0.6555388 2.0016795
## 6055fdc6856f36729d2e521b -0.057627828 0.5651258 -1.3821086 1.1493950
## 60579f2a856f36729d2e521e -0.230201023 0.6270218 -1.8978505 0.7629862
## 60589862856f36729d2e521f 0.026791893 0.6286982 -1.3312424 1.4598502
## 605a30a7856f36729d2e5221 0.003214293 0.5553910 -1.2410379 1.2123805
## 605afa3a856f36729d2e5222 0.143033731 0.5647349 -0.8942491 1.4892687
## 605c8bc6856f36729d2e5223 0.140888711 0.5391848 -0.8319169 1.4732160
## 605f3f2d856f36729d2e5224 -0.206526332 0.5789037 -1.6890799 0.7750839
## 605f46c3856f36729d2e5225 -0.377421560 0.6629626 -2.1120662 0.5013496
## 60605337856f36729d2e5226 -0.007595143 0.5488059 -1.2171280 1.1694757
## 60609ae6856f36729d2e5228 -0.077252498 0.5197291 -1.2715670 0.9599132
## 6061ce91856f36729d2e522e 0.266236684 0.6018718 -0.6589558 1.8098424
## 6061f106856f36729d2e5231 0.059041868 0.5308058 -1.0556227 1.2695254
## 60672faa856f36729d2e523c 0.018507918 0.5849574 -1.2720438 1.2728496
## 6068ea9f856f36729d2e523e 0.229267136 0.6015340 -0.7346368 1.7698431
## 606db69d856f36729d2e5243 0.010330534 0.5718739 -1.2485315 1.2439217
## 6075ab05856f36729d2e5247 -0.390393987 0.6810910 -2.2353987 0.4875631
plot(own_quality0.all, ask = FALSE)
pp_check(own_quality0.all, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.
This means that our final model, with all data points and experience
predictors, is own_quality0.all
To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.
mcmc_areas(own_quality0.all,
pars = c(
"b_var_names_copied_good.ratio",
"b_var_names_new_good.ratio",
"b_reused_logic_validationfalse",
"b_equals.existsFALSE",
"b_sonarqube_issues.s",
"b_documentationIncorrect",
"b_documentationNone"
),
prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c(
"Ratio of good copied var names",
"Ratio of good new var names",
"Duplicated validation logic",
"Missing equals implementation",
"Amount of sonarqube issues",
"Incorrect documentation",
"No documentation"
)) +
ggtitle("Beta parameters densities in self assesed quality model", subtitle = "Shaded region marks 95% of the density. Line marks the median")
As we have a low of effects playing small roles we will simulate two scenarios, one where the developer, according to us does well and one where the developers does not do so well and see if the participant rating approves with us.
scale_programming_experience <- function(x) {
(x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}
post_settings <- data_frame(
var_names_copied_good.ratio = c(0.9, 0.5),
var_names_new_good.ratio = c(0.9, 0.5),
reused_logic_validation = c("true", "false"),
equals.exists = c("TRUE", "FALSE"),
sonarqube_issues.s = c(-1, 1),
documentation = c("Correct", "Incorrect"),
session = NA
)
post <- posterior_predict(own_quality0.all, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
mutate(submission = revalue(reused_logic_validation, c("true" = "Good", "false" = "Bad"))) %>%
select(
estimate,
submission
)
post.nice <- post %>% mutate_at("estimate", function(x) revalue(as.ordered(x), c(
"1"="Very Bad",
"2"="Bad",
"3"="Somewhat Bad",
"4"="Neutral",
"5"="Somewhat Good",
"6"="Good",
"7"="Very Good"
)))
vline.data <- post.nice %>%
group_by(submission) %>%
summarize(z = mean(as.numeric(estimate)))
sprintf("Estimations for 3 years experience")
## [1] "Estimations for 3 years experience"
post.nice %>%
ggplot() +
geom_histogram(aes(x=estimate),fill= "Light Blue", stat = "count") +
geom_vline(aes(xintercept = z),
vline.data,
col = "Dark Blue",
lwd = 1)+
facet_grid(rows = vars(submission)) +
scale_y_continuous(limits = NULL, breaks = sapply(c(0.1, 0.2, 0.3, 0.4, 0.5), function(x) x*nrow(post.nice) / 2), labels = c("10%","20%","30%","40%", "50%")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
post.diff <- post %>% filter(submission == "Bad")
post.diff$estimate = post.diff$estimate - filter(post, submission == "Good")$estimate
post.diff %>%
ggplot(aes(x=estimate)) +
geom_boxplot(quantile_lines = TRUE, quantile_fun = hdi, vline_linetype = 2) +
labs(
title = "Submission rating diff",
subtitle = "Difference as: bad submission rating - good submission rating",
x = "Rating difference"
) +
scale_y_continuous(breaks = NULL)
We can then proceed to calculate some likelihoods:
bad_rated_higher <- sum(post.diff < 0)
good_rated_higher <- sum(post.diff > 0)
x <- good_rated_higher / bad_rated_higher
x
## [1] 2.24108
Participants were 124% more likely to rate the bad submission as worse then they were to rate the good submission as worse.