Looking at the data

Looks like there is a slightly higher rate of negative ratings for the high debt group, but also an even smaller increase in positive ratings.

likert.data <- d.both_completed %>%
  select(high_debt_version, quality_post_task)

likert.data$quality_post_task <- revalue(likert.data$quality_post_task, c(
      "-3"="Very Bad",
      "-2"="Bad",
      "-1"="Somewhat Bad",
      "0"="Neutral",
      "1"="Somewhat Good",
      "2"="Good",
      "3"="Very Good"
    ))

likert.data$high_debt_version <- revalue(likert.data$high_debt_version, c(
      "true"="High Debt",
      "false"="Low Debt"
    ))

ggplot(likert.data, aes(x=quality_post_task)) +
  geom_bar(fill= "Light Blue") +
  facet_grid(rows = vars(high_debt_version)) +
    scale_y_continuous(limits = NULL, breaks = c(2,4,6,8), labels = c("2","4","6","8")) +
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank())

Initial model

As the data is collected from a likert scale we will use a cumulative family, indicating that each level on the scale is an incremental step. This model is also able to fit the data well.

We include high_debt_verison as a predictor in our model as this variable represent the very effect we want to measure. We also include a varying intercept for each individual to prevent the model from learning too much from single participants with extreme measurements.

Selecting priors

We iterate over the model until we have sane priors.

own_quality.with <- extendable_model(
  base_name = "own_quality",
  base_formula = "quality_post_task ~ 1  + (1 | session)",
    base_priors = c(
    prior(normal(0, 2.5), class = "Intercept"),
    #prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd")
  ),
  family = cumulative(),
  data = d.both_completed,
  base_control = list(adapt_delta = 0.95)
)

Default priors

prior_summary(own_quality.with(only_priors= TRUE))

Selected priors

prior_summary(own_quality.with(sample_prior = "only"))

Prior predictive check

pp_check(own_quality.with(sample_prior = "only"), nsamples = 200, type = "bars")

Beta parameter influence

We choose a beta parameter priors allowing for the beta parameter to account for 100% of the effect but that is skeptical to such strong effects from the beta parameter.

sim.size <- 1000
sim.intercept <- rnorm(sim.size, 0, 2.5)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100

data.frame(x = sim.beta.diff) %>%
  ggplot(aes(x)) +
  geom_density() +
  xlim(-150, 150) +
  labs(
    title = "Beta parameter prior influence",
    x = "Estimate with beta as % of estimate without beta",
    y = "Density"
  )

Model fit

We check the posterior distribution and can see that the model seems to have been able to fit the data well. Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.

Posterior predictive check

pp_check(own_quality.with(), nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

Summary

summary(own_quality.with())
##  Family: cumulative 
##   Links: mu = logit; disc = identity 
## Formula: quality_post_task ~ 1 + (1 | session) 
##    Data: as.data.frame(data) (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.51      0.38     0.02     1.43 1.00     1589     2260
## 
## Population-Level Effects: 
##              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept[1]    -3.24      0.73    -4.83    -1.95 1.00     3196     2447
## Intercept[2]    -0.84      0.36    -1.62    -0.19 1.00     4063     3480
## Intercept[3]     1.65      0.42     0.89     2.51 1.00     4608     3463
## Intercept[4]     2.38      0.53     1.46     3.54 1.00     5464     3207
## Intercept[5]     3.32      0.73     2.07     4.88 1.00     6403     3543
## 
## Family Specific Parameters: 
##      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc     1.00      0.00     1.00     1.00   NA       NA       NA
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Sampling plots

plot(own_quality.with(), ask = FALSE)

Model quality extenstions

# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
# deafult beta prior
beta_prior <- prior(normal(0, 1), class = "b")

We use loo to check some possible extensions on the model.

Variable names

loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  
  # New model(s)
  own_quality.with("var_names_new_good.ratio", beta_prior),
  own_quality.with("var_names_copied_good.ratio", beta_prior),
  own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior)
)

Comparison

loo_result[2]
## $diffs
##                                                                                                elpd_diff
## own_quality.with()                                                                              0.0     
## own_quality.with("var_names_copied_good.ratio", beta_prior)                                    -0.2     
## own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"),     beta_prior) -0.4     
## own_quality.with("var_names_new_good.ratio", beta_prior)                                       -0.4     
##                                                                                                se_diff
## own_quality.with()                                                                              0.0   
## own_quality.with("var_names_copied_good.ratio", beta_prior)                                     0.7   
## own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"),     beta_prior)  0.7   
## own_quality.with("var_names_new_good.ratio", beta_prior)                                        0.2

Diagnostics

loo_result[1]
## $loos
## $loos$`own_quality.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.2  6.8
## p_loo         8.2  1.6
## looic       124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   901       
##  (0.5, 0.7]   (ok)        7    15.9%   1126      
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with("var_names_new_good.ratio", beta_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.6  6.8
## p_loo         8.7  1.7
## looic       125.3 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     40    90.9%   1167      
##  (0.5, 0.7]   (ok)        4     9.1%   380       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with("var_names_copied_good.ratio", beta_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.5  6.8
## p_loo         8.7  1.7
## looic       124.9 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   1266      
##  (0.5, 0.7]   (ok)        5    11.4%   1037      
##    (0.7, 1]   (bad)       1     2.3%   323       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"),     beta_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.6  6.7
## p_loo         9.0  1.7
## looic       125.2 13.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   1031      
##  (0.5, 0.7]   (ok)        5    11.4%   935       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.

Reuse

loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  
  # New model(s)
  own_quality.with("reused_logic_constructor", beta_prior),
  own_quality.with("reused_logic_validation", beta_prior),
  own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior)
)

Comparison

loo_result[2]
## $diffs
##                                                                                            elpd_diff
## own_quality.with("reused_logic_validation", beta_prior)                                     0.0     
## own_quality.with(c("reused_logic_validation", "reused_logic_constructor"),     beta_prior) -0.7     
## own_quality.with("reused_logic_constructor", beta_prior)                                   -1.7     
## own_quality.with()                                                                         -2.2     
##                                                                                            se_diff
## own_quality.with("reused_logic_validation", beta_prior)                                     0.0   
## own_quality.with(c("reused_logic_validation", "reused_logic_constructor"),     beta_prior)  0.3   
## own_quality.with("reused_logic_constructor", beta_prior)                                    1.1   
## own_quality.with()                                                                          1.5

Diagnostics

loo_result[1]
## $loos
## $loos$`own_quality.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.2  6.8
## p_loo         8.2  1.6
## looic       124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   901       
##  (0.5, 0.7]   (ok)        7    15.9%   1126      
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with("reused_logic_constructor", beta_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -61.8  6.6
## p_loo         9.1  1.7
## looic       123.5 13.2
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     40    90.9%   1218      
##  (0.5, 0.7]   (ok)        3     6.8%   508       
##    (0.7, 1]   (bad)       1     2.3%   140       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with("reused_logic_validation", beta_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -60.1  6.1
## p_loo         8.4  1.5
## looic       120.2 12.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   1180      
##  (0.5, 0.7]   (ok)        6    13.6%   1472      
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with(c("reused_logic_validation", "reused_logic_constructor"),     beta_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -60.7  6.2
## p_loo         9.3  1.6
## looic       121.5 12.3
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     40    90.9%   1329      
##  (0.5, 0.7]   (ok)        4     9.1%   584       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.

Utility methods

loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  
  # New model(s)
  own_quality.with("equals.exists", beta_prior),
  own_quality.with("hashcode.exists", beta_prior),
  own_quality.with(c("hashcode.state", "equals.exists"), beta_prior)
)

Comparison

loo_result[2]
## $diffs
##                                                                    elpd_diff
## own_quality.with()                                                  0.0     
## own_quality.with("equals.exists", beta_prior)                      -0.6     
## own_quality.with("hashcode.exists", beta_prior)                    -0.7     
## own_quality.with(c("hashcode.state", "equals.exists"), beta_prior) -0.7     
##                                                                    se_diff
## own_quality.with()                                                  0.0   
## own_quality.with("equals.exists", beta_prior)                       0.1   
## own_quality.with("hashcode.exists", beta_prior)                     0.2   
## own_quality.with(c("hashcode.state", "equals.exists"), beta_prior)  1.1

Diagnostics

loo_result[1]
## $loos
## $loos$`own_quality.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.2  6.8
## p_loo         8.2  1.6
## looic       124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   901       
##  (0.5, 0.7]   (ok)        7    15.9%   1126      
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with("equals.exists", beta_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.9  6.9
## p_loo         9.0  1.7
## looic       125.7 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     41    93.2%   694       
##  (0.5, 0.7]   (ok)        3     6.8%   1152      
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with("hashcode.exists", beta_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.9  6.9
## p_loo         9.0  1.8
## looic       125.8 13.8
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   949       
##  (0.5, 0.7]   (ok)        5    11.4%   488       
##    (0.7, 1]   (bad)       1     2.3%   231       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with(c("hashcode.state", "equals.exists"), beta_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.9  6.8
## p_loo        10.1  1.9
## looic       125.8 13.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   1158      
##  (0.5, 0.7]   (ok)        6    13.6%   591       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.

Other quality attributes

loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  
  # New model(s)
  own_quality.with("sonarqube_issues.s"),
  own_quality.with("documentation")
)

Comparison

loo_result[2]
## $diffs
##                                        elpd_diff se_diff
## own_quality.with()                      0.0       0.0   
## own_quality.with("sonarqube_issues.s") -1.7       1.2   
## own_quality.with("documentation")      -2.4       1.3

Diagnostics

loo_result[1]
## $loos
## $loos$`own_quality.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.2  6.8
## p_loo         8.2  1.6
## looic       124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   901       
##  (0.5, 0.7]   (ok)        7    15.9%   1126      
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with("sonarqube_issues.s")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -63.9  7.3
## p_loo        10.3  2.4
## looic       127.9 14.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   290       
##  (0.5, 0.7]   (ok)        5    11.4%   437       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality.with("documentation")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.6  7.3
## p_loo        11.3  2.4
## looic       129.3 14.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   698       
##  (0.5, 0.7]   (ok)        6    13.6%   251       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.

Model predictor extenstions

We create a new base model based on what we learned from extending the previous model with different quality measurements. We take care to not include redundant quality indicators.

own_quality1.with <- extendable_model(
  base_name = "own_quality1",
  base_formula = "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session)",
    base_priors = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd")
  ),
  family = cumulative(),
  data = data.frame(d.both_completed),
  base_control = list(adapt_delta = 0.95)
)

One variable

loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  own_quality1.with(),
  
  # New model(s)
  own_quality1.with("work_domain"),
  own_quality1.with("work_experience_programming.s"),
  own_quality1.with("work_experience_java.s"),
  own_quality1.with("education_field"),
  own_quality1.with("mo(education_level)", edlvl_prior),
  own_quality1.with("workplace_peer_review"),
  own_quality1.with("workplace_td_tracking"),
  own_quality1.with("workplace_pair_programming"),
  own_quality1.with("workplace_coding_standards"),
  own_quality1.with("scenario"),
  own_quality1.with("group")
)

Comparison

loo_result[2]
## $diffs
##                                                       elpd_diff se_diff
## own_quality1.with("mo(education_level)", edlvl_prior)  0.0       0.0   
## own_quality.with()                                    -1.0       3.0   
## own_quality1.with("education_field")                  -2.7       2.8   
## own_quality1.with()                                   -2.8       2.7   
## own_quality1.with("workplace_peer_review")            -2.9       2.4   
## own_quality1.with("workplace_coding_standards")       -3.4       2.5   
## own_quality1.with("group")                            -3.4       2.2   
## own_quality1.with("workplace_td_tracking")            -3.5       2.3   
## own_quality1.with("scenario")                         -3.5       2.6   
## own_quality1.with("workplace_pair_programming")       -3.6       2.7   
## own_quality1.with("work_experience_programming.s")    -4.1       2.0   
## own_quality1.with("work_domain")                      -4.2       2.7   
## own_quality1.with("work_experience_java.s")           -4.2       2.2

Diagnostics

loo_result[1]
## $loos
## $loos$`own_quality.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.2  6.8
## p_loo         8.2  1.6
## looic       124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   901       
##  (0.5, 0.7]   (ok)        7    15.9%   1126      
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.0  6.7
## p_loo        13.3  2.6
## looic       128.0 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   1010      
##  (0.5, 0.7]   (ok)        5    11.4%   409       
##    (0.7, 1]   (bad)       1     2.3%   123       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("work_domain")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -65.4  7.2
## p_loo        17.0  3.3
## looic       130.8 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   778       
##  (0.5, 0.7]   (ok)        5    11.4%   157       
##    (0.7, 1]   (bad)       1     2.3%   106       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("work_experience_programming.s")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -65.3  7.0
## p_loo        15.2  3.1
## looic       130.5 14.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     34    77.3%   487       
##  (0.5, 0.7]   (ok)       10    22.7%   288       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("work_experience_java.s")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -65.4  6.9
## p_loo        15.1  3.0
## looic       130.9 13.8
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     36    81.8%   768       
##  (0.5, 0.7]   (ok)        7    15.9%   251       
##    (0.7, 1]   (bad)       1     2.3%   303       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("education_field")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -63.9  6.7
## p_loo        13.9  2.5
## looic       127.8 13.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   624       
##  (0.5, 0.7]   (ok)        4     9.1%   1059      
##    (0.7, 1]   (bad)       2     4.5%   198       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("mo(education_level)", edlvl_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -61.2  7.2
## p_loo        13.0  2.6
## looic       122.4 14.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   1199      
##  (0.5, 0.7]   (ok)        4     9.1%   352       
##    (0.7, 1]   (bad)       1     2.3%   218       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("workplace_peer_review")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.1  6.8
## p_loo        13.7  2.5
## looic       128.3 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   665       
##  (0.5, 0.7]   (ok)        5    11.4%   230       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("workplace_td_tracking")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.7  6.8
## p_loo        14.2  2.6
## looic       129.4 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   730       
##  (0.5, 0.7]   (ok)        7    15.9%   164       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("workplace_pair_programming")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.8  6.8
## p_loo        14.2  2.6
## looic       129.7 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   774       
##  (0.5, 0.7]   (ok)        5    11.4%   337       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("workplace_coding_standards")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.6  6.7
## p_loo        14.1  2.6
## looic       129.2 13.4
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   571       
##  (0.5, 0.7]   (ok)        7    15.9%   307       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("scenario")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.7  6.6
## p_loo        14.1  2.5
## looic       129.4 13.3
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   569       
##  (0.5, 0.7]   (ok)        6    13.6%   343       
##    (0.7, 1]   (bad)       1     2.3%   3531      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("group")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.6  7.0
## p_loo        14.7  2.7
## looic       129.3 13.9
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     36    81.8%   743       
##  (0.5, 0.7]   (ok)        8    18.2%   404       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.

Two variables

loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  own_quality1.with(),
  
  own_quality1.with("mo(education_level)", edlvl_prior),
  own_quality1.with("education_field"),
  own_quality1.with("workplace_peer_review"),
  
  # New model(s)
  own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior),
  own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior),
  own_quality1.with(c("education_field", "workplace_peer_review"))
)

Comparison

loo_result[2]
## $diffs
##                                                                                       elpd_diff
## own_quality1.with("mo(education_level)", edlvl_prior)                                  0.0     
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"),     edlvl_prior) -0.3     
## own_quality1.with(c("mo(education_level)", "education_field"),     edlvl_prior)       -0.6     
## own_quality.with()                                                                    -1.0     
## own_quality1.with("education_field")                                                  -2.7     
## own_quality1.with()                                                                   -2.8     
## own_quality1.with("workplace_peer_review")                                            -2.9     
## own_quality1.with(c("education_field", "workplace_peer_review"))                      -3.1     
##                                                                                       se_diff
## own_quality1.with("mo(education_level)", edlvl_prior)                                  0.0   
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"),     edlvl_prior)  0.3   
## own_quality1.with(c("mo(education_level)", "education_field"),     edlvl_prior)        0.5   
## own_quality.with()                                                                     3.0   
## own_quality1.with("education_field")                                                   2.8   
## own_quality1.with()                                                                    2.7   
## own_quality1.with("workplace_peer_review")                                             2.4   
## own_quality1.with(c("education_field", "workplace_peer_review"))                       2.6

Diagnostics

loo_result[1]
## $loos
## $loos$`own_quality.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.2  6.8
## p_loo         8.2  1.6
## looic       124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   901       
##  (0.5, 0.7]   (ok)        7    15.9%   1126      
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.0  6.7
## p_loo        13.3  2.6
## looic       128.0 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   1010      
##  (0.5, 0.7]   (ok)        5    11.4%   409       
##    (0.7, 1]   (bad)       1     2.3%   123       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("mo(education_level)", edlvl_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -61.2  7.2
## p_loo        13.0  2.6
## looic       122.4 14.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   1199      
##  (0.5, 0.7]   (ok)        4     9.1%   352       
##    (0.7, 1]   (bad)       1     2.3%   218       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("education_field")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -63.9  6.7
## p_loo        13.9  2.5
## looic       127.8 13.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   624       
##  (0.5, 0.7]   (ok)        4     9.1%   1059      
##    (0.7, 1]   (bad)       2     4.5%   198       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("workplace_peer_review")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.1  6.8
## p_loo        13.7  2.5
## looic       128.3 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   665       
##  (0.5, 0.7]   (ok)        5    11.4%   230       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with(c("mo(education_level)", "education_field"),     edlvl_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -61.8  7.2
## p_loo        14.0  2.7
## looic       123.6 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     40    90.9%   564       
##  (0.5, 0.7]   (ok)        3     6.8%   952       
##    (0.7, 1]   (bad)       1     2.3%   306       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with(c("mo(education_level)", "workplace_peer_review"),     edlvl_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -61.5  7.2
## p_loo        13.4  2.6
## looic       123.0 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     41    93.2%   566       
##  (0.5, 0.7]   (ok)        2     4.5%   339       
##    (0.7, 1]   (bad)       1     2.3%   352       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with(c("education_field", "workplace_peer_review"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.3  6.7
## p_loo        14.4  2.5
## looic       128.6 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   806       
##  (0.5, 0.7]   (ok)        4     9.1%   1396      
##    (0.7, 1]   (bad)       1     2.3%   181       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.

Three variables

loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  own_quality1.with(),
  
  own_quality1.with("mo(education_level)", edlvl_prior),
  own_quality1.with("education_field"),
  own_quality1.with("workplace_peer_review"),
  
  own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior),
  own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior),
  
  # New model(s)
  own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior)
)

Comparison

loo_result[2]
## $diffs
##                                                                                                          elpd_diff
## own_quality1.with("mo(education_level)", edlvl_prior)                                                     0.0     
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"),     edlvl_prior)                    -0.3     
## own_quality1.with(c("mo(education_level)", "education_field"),     edlvl_prior)                          -0.6     
## own_quality.with()                                                                                       -1.0     
## own_quality1.with(c("mo(education_level)", "education_field",     "workplace_peer_review"), edlvl_prior) -1.4     
## own_quality1.with("education_field")                                                                     -2.7     
## own_quality1.with()                                                                                      -2.8     
## own_quality1.with("workplace_peer_review")                                                               -2.9     
##                                                                                                          se_diff
## own_quality1.with("mo(education_level)", edlvl_prior)                                                     0.0   
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"),     edlvl_prior)                     0.3   
## own_quality1.with(c("mo(education_level)", "education_field"),     edlvl_prior)                           0.5   
## own_quality.with()                                                                                        3.0   
## own_quality1.with(c("mo(education_level)", "education_field",     "workplace_peer_review"), edlvl_prior)  0.5   
## own_quality1.with("education_field")                                                                      2.8   
## own_quality1.with()                                                                                       2.7   
## own_quality1.with("workplace_peer_review")                                                                2.4

Diagnostics

loo_result[1]
## $loos
## $loos$`own_quality.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.2  6.8
## p_loo         8.2  1.6
## looic       124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     37    84.1%   901       
##  (0.5, 0.7]   (ok)        7    15.9%   1126      
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.0  6.7
## p_loo        13.3  2.6
## looic       128.0 13.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   1010      
##  (0.5, 0.7]   (ok)        5    11.4%   409       
##    (0.7, 1]   (bad)       1     2.3%   123       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("mo(education_level)", edlvl_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -61.2  7.2
## p_loo        13.0  2.6
## looic       122.4 14.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   1199      
##  (0.5, 0.7]   (ok)        4     9.1%   352       
##    (0.7, 1]   (bad)       1     2.3%   218       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("education_field")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -63.9  6.7
## p_loo        13.9  2.5
## looic       127.8 13.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     38    86.4%   624       
##  (0.5, 0.7]   (ok)        4     9.1%   1059      
##    (0.7, 1]   (bad)       2     4.5%   198       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with("workplace_peer_review")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -64.1  6.8
## p_loo        13.7  2.5
## looic       128.3 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   665       
##  (0.5, 0.7]   (ok)        5    11.4%   230       
##    (0.7, 1]   (bad)       0     0.0%   <NA>      
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## 
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with(c("mo(education_level)", "education_field"),     edlvl_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -61.8  7.2
## p_loo        14.0  2.7
## looic       123.6 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     40    90.9%   564       
##  (0.5, 0.7]   (ok)        3     6.8%   952       
##    (0.7, 1]   (bad)       1     2.3%   306       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with(c("mo(education_level)", "workplace_peer_review"),     edlvl_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -61.5  7.2
## p_loo        13.4  2.6
## looic       123.0 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     41    93.2%   566       
##  (0.5, 0.7]   (ok)        2     4.5%   339       
##    (0.7, 1]   (bad)       1     2.3%   352       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`own_quality1.with(c("mo(education_level)", "education_field",     "workplace_peer_review"), edlvl_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -62.6  7.3
## p_loo        14.8  2.8
## looic       125.2 14.6
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     39    88.6%   1057      
##  (0.5, 0.7]   (ok)        4     9.1%   182       
##    (0.7, 1]   (bad)       1     2.3%   330       
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.

Candidate models

We pick some of our top performing models as candidates and inspect them closer.

The candidate models are named and listed in order of complexity.

OwnQuality0

We select the simplest model as a baseline.

own_quality0 <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session)",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd")
  ),
  family = cumulative(),
  data = as.data.frame(d.both_completed),
  file = "fits/own_quality0",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)

Summary

summary(own_quality0)
##  Family: cumulative 
##   Links: mu = logit; disc = identity 
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) 
##    Data: as.data.frame(d.both_completed) (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.54      0.41     0.02     1.50 1.00     1377     1852
## 
## Population-Level Effects: 
##                              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1]                    -4.12      1.20    -6.52    -1.86 1.00     4382
## Intercept[2]                    -1.61      1.01    -3.55     0.44 1.00     5285
## Intercept[3]                     1.10      1.03    -0.85     3.18 1.00     4168
## Intercept[4]                     1.88      1.06    -0.08     4.08 1.00     4500
## Intercept[5]                     2.85      1.15     0.71     5.23 1.00     4646
## var_names_copied_good.ratio      0.18      0.68    -1.15     1.48 1.00     5869
## var_names_new_good.ratio        -0.20      0.76    -1.68     1.31 1.00     5669
## reused_logic_validationfalse    -1.26      0.67    -2.57     0.05 1.00     4748
## equals.existsFALSE              -0.11      0.58    -1.22     1.07 1.00     4360
## sonarqube_issues.s               0.15      0.32    -0.51     0.76 1.00     5294
## documentationIncorrect          -0.08      0.70    -1.42     1.25 1.00     4547
## documentationNone               -0.31      0.67    -1.62     0.99 1.00     4767
##                              Tail_ESS
## Intercept[1]                     2579
## Intercept[2]                     3155
## Intercept[3]                     2575
## Intercept[4]                     2813
## Intercept[5]                     2671
## var_names_copied_good.ratio      3184
## var_names_new_good.ratio         2806
## reused_logic_validationfalse     3010
## equals.existsFALSE               1868
## sonarqube_issues.s               3087
## documentationIncorrect           2868
## documentationNone                3423
## 
## Family Specific Parameters: 
##      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc     1.00      0.00     1.00     1.00   NA       NA       NA
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects

ranef(own_quality0)
## $session
## , , Intercept
## 
##                              Estimate Est.Error       Q2.5     Q97.5
## 6033d69a5af2c702367b3a95  0.004665587 0.5271372 -1.1449885 1.1827776
## 6033d90a5af2c702367b3a96  0.023284686 0.5501864 -1.1246254 1.2622077
## 6034fc165af2c702367b3a98  0.227026451 0.6240559 -0.7973700 1.8220845
## 603500725af2c702367b3a99  0.200320329 0.5833540 -0.7840352 1.6579879
## 603f97625af2c702367b3a9d  0.182251311 0.5817026 -0.8139791 1.6569444
## 603fd5d95af2c702367b3a9e  0.215486492 0.5698347 -0.7538640 1.5917187
## 60409b7b5af2c702367b3a9f -0.277551611 0.6125458 -1.8649792 0.6653697
## 604b82b5a7718fbed181b336 -0.125033006 0.5701509 -1.5287796 0.9794940
## 6050c1bf856f36729d2e5218 -0.364047446 0.6917797 -2.1648627 0.6225998
## 6050e1e7856f36729d2e5219  0.288414457 0.6607054 -0.7358960 1.9325283
## 6055fdc6856f36729d2e521b -0.086786709 0.5699652 -1.4577966 1.0608163
## 60589862856f36729d2e521f  0.016301411 0.6324473 -1.3462922 1.3726167
## 605afa3a856f36729d2e5222  0.113368643 0.5679028 -0.9928262 1.4190855
## 605c8bc6856f36729d2e5223  0.123040831 0.5646846 -0.9752734 1.4696968
## 605f3f2d856f36729d2e5224 -0.261539668 0.6369305 -1.9414778 0.7675169
## 605f46c3856f36729d2e5225 -0.421105400 0.7302169 -2.3579902 0.5425406
## 60605337856f36729d2e5226 -0.035029702 0.5678189 -1.2934346 1.1889720
## 60609ae6856f36729d2e5228 -0.107957141 0.5530194 -1.4493475 0.9717934
## 6061ce91856f36729d2e522e  0.255057801 0.6157792 -0.7558465 1.8528779
## 6061f106856f36729d2e5231  0.037582109 0.5418064 -1.0923383 1.2626361
## 6068ea9f856f36729d2e523e  0.208912425 0.5985266 -0.8457582 1.7447481
## 6075ab05856f36729d2e5247 -0.444331583 0.7311070 -2.3089178 0.5186057

Sampling plots

plot(own_quality0, ask = FALSE)

Posterior predictive check

pp_check(own_quality0, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

OwnQuality1

We select the best performing model with one variable.

own_quality1 <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session) +
    mo(education_level)",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd"),
    prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
  ),
  family = cumulative(),
  data = as.data.frame(d.both_completed),
  file = "fits/own_quality1",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)

Summary

summary(own_quality1)
##  Family: cumulative 
##   Links: mu = logit; disc = identity 
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) + mo(education_level) 
##    Data: as.data.frame(d.both_completed) (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.40      0.34     0.01     1.25 1.00     2245     2330
## 
## Population-Level Effects: 
##                              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1]                    -5.53      1.33    -8.19    -2.96 1.00     4343
## Intercept[2]                    -2.98      1.18    -5.34    -0.73 1.00     4720
## Intercept[3]                     0.06      1.11    -2.17     2.19 1.00     5300
## Intercept[4]                     0.92      1.13    -1.27     3.10 1.00     5847
## Intercept[5]                     1.95      1.19    -0.34     4.31 1.00     5923
## var_names_copied_good.ratio      0.29      0.68    -1.03     1.60 1.00     5342
## var_names_new_good.ratio        -0.20      0.73    -1.62     1.22 1.00     6497
## reused_logic_validationfalse    -1.14      0.69    -2.48     0.20 1.00     5877
## equals.existsFALSE               0.00      0.59    -1.13     1.18 1.00     7504
## sonarqube_issues.s               0.33      0.33    -0.34     0.97 1.00     6788
## documentationIncorrect          -0.20      0.70    -1.60     1.18 1.00     5947
## documentationNone               -0.35      0.68    -1.65     0.95 1.00     5406
## moeducation_level               -0.62      0.24    -1.10    -0.14 1.00     4044
##                              Tail_ESS
## Intercept[1]                     3011
## Intercept[2]                     3165
## Intercept[3]                     3294
## Intercept[4]                     3301
## Intercept[5]                     3663
## var_names_copied_good.ratio      3350
## var_names_new_good.ratio         3030
## reused_logic_validationfalse     2850
## equals.existsFALSE               3325
## sonarqube_issues.s               3157
## documentationIncorrect           2731
## documentationNone                3139
## moeducation_level                3007
## 
## Simplex Parameters: 
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## moeducation_level1[1]     0.21      0.12     0.04     0.49 1.00     7442
## moeducation_level1[2]     0.29      0.15     0.05     0.61 1.00     6584
## moeducation_level1[3]     0.24      0.13     0.04     0.54 1.00     7079
## moeducation_level1[4]     0.26      0.14     0.05     0.56 1.00     6149
##                       Tail_ESS
## moeducation_level1[1]     2541
## moeducation_level1[2]     2675
## moeducation_level1[3]     2917
## moeducation_level1[4]     2774
## 
## Family Specific Parameters: 
##      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc     1.00      0.00     1.00     1.00   NA       NA       NA
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects

ranef(own_quality1)
## $session
## , , Intercept
## 
##                              Estimate Est.Error       Q2.5     Q97.5
## 6033d69a5af2c702367b3a95  0.060421418 0.4474515 -0.8219355 1.1664143
## 6033d90a5af2c702367b3a96 -0.070607334 0.4486236 -1.1144652 0.8240244
## 6034fc165af2c702367b3a98  0.171986384 0.5122651 -0.6434738 1.5118743
## 603500725af2c702367b3a99  0.051399096 0.4717215 -0.9177683 1.1503399
## 603f97625af2c702367b3a9d  0.149034948 0.5164489 -0.7244956 1.4400761
## 603fd5d95af2c702367b3a9e  0.063851620 0.4693014 -0.8726579 1.1986143
## 60409b7b5af2c702367b3a9f -0.166169658 0.5097604 -1.4964202 0.6537067
## 604b82b5a7718fbed181b336  0.006109449 0.4636015 -1.0226025 1.0076475
## 6050c1bf856f36729d2e5218 -0.174832601 0.5141417 -1.4825647 0.6404709
## 6050e1e7856f36729d2e5219  0.128640507 0.4793011 -0.7086915 1.3221355
## 6055fdc6856f36729d2e521b -0.084090282 0.4906364 -1.3132208 0.8137954
## 60589862856f36729d2e521f  0.035911177 0.4906062 -0.9472696 1.1771830
## 605afa3a856f36729d2e5222  0.123931386 0.4821368 -0.7407700 1.3444923
## 605c8bc6856f36729d2e5223  0.200810030 0.5199002 -0.5847659 1.5845660
## 605f3f2d856f36729d2e5224 -0.136639502 0.4852726 -1.3773358 0.6892590
## 605f46c3856f36729d2e5225 -0.153429698 0.5083398 -1.5200230 0.6670267
## 60605337856f36729d2e5226  0.027154425 0.4910657 -0.9848317 1.1233308
## 60609ae6856f36729d2e5228 -0.071853728 0.4490669 -1.1682429 0.7967916
## 6061ce91856f36729d2e522e  0.113331195 0.4837503 -0.7028130 1.3565083
## 6061f106856f36729d2e5231 -0.053457133 0.4563305 -1.1416028 0.8943779
## 6068ea9f856f36729d2e523e  0.014520346 0.4376197 -0.8995343 1.0167069
## 6075ab05856f36729d2e5247 -0.189252397 0.5080556 -1.5618180 0.5871634

Sampling plots

plot(own_quality1, ask = FALSE)

Posterior predictive check

pp_check(own_quality1, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

OwnQuality2

We select the best performing model with two variables.

own_quality2 <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session) +
    mo(education_level) +
    education_field",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd"),
    prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
  ),
  family = cumulative(),
  data = as.data.frame(d.both_completed),
  file = "fits/own_quality2",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)

Summary

summary(own_quality2)
##  Family: cumulative 
##   Links: mu = logit; disc = identity 
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) + mo(education_level) + education_field 
##    Data: as.data.frame(d.both_completed) (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.43      0.34     0.01     1.25 1.00     2324     2224
## 
## Population-Level Effects: 
##                                    Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1]                          -5.40      1.42    -8.30    -2.68 1.00
## Intercept[2]                          -2.82      1.26    -5.35    -0.35 1.00
## Intercept[3]                           0.26      1.22    -2.16     2.62 1.00
## Intercept[4]                           1.11      1.25    -1.29     3.56 1.00
## Intercept[5]                           2.15      1.33    -0.41     4.88 1.00
## var_names_copied_good.ratio            0.29      0.68    -1.11     1.63 1.00
## var_names_new_good.ratio              -0.14      0.76    -1.61     1.42 1.00
## reused_logic_validationfalse          -1.16      0.67    -2.48     0.11 1.00
## equals.existsFALSE                     0.02      0.60    -1.13     1.18 1.00
## sonarqube_issues.s                     0.33      0.33    -0.34     0.93 1.00
## documentationIncorrect                -0.23      0.75    -1.76     1.24 1.00
## documentationNone                     -0.32      0.67    -1.62     0.99 1.00
## education_fieldInteractionDesign      -0.55      0.83    -2.19     1.07 1.00
## education_fieldNone                    0.05      0.86    -1.66     1.68 1.00
## education_fieldSoftwareEngineering     0.18      0.58    -0.93     1.29 1.00
## moeducation_level                     -0.60      0.25    -1.11    -0.12 1.00
##                                    Bulk_ESS Tail_ESS
## Intercept[1]                           5238     3056
## Intercept[2]                           5781     3357
## Intercept[3]                           6526     3252
## Intercept[4]                           6526     3251
## Intercept[5]                           6843     3411
## var_names_copied_good.ratio            5257     2958
## var_names_new_good.ratio               5628     2933
## reused_logic_validationfalse           6096     3450
## equals.existsFALSE                     6505     3089
## sonarqube_issues.s                     5480     2844
## documentationIncorrect                 5800     2840
## documentationNone                      6454     2924
## education_fieldInteractionDesign       6077     2829
## education_fieldNone                    6250     3102
## education_fieldSoftwareEngineering     7173     3918
## moeducation_level                      4320     3635
## 
## Simplex Parameters: 
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## moeducation_level1[1]     0.22      0.13     0.03     0.51 1.00     5329
## moeducation_level1[2]     0.28      0.15     0.05     0.61 1.00     6568
## moeducation_level1[3]     0.24      0.13     0.04     0.54 1.00     6008
## moeducation_level1[4]     0.26      0.14     0.04     0.55 1.00     5909
##                       Tail_ESS
## moeducation_level1[1]     2196
## moeducation_level1[2]     3083
## moeducation_level1[3]     3158
## moeducation_level1[4]     2610
## 
## Family Specific Parameters: 
##      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc     1.00      0.00     1.00     1.00   NA       NA       NA
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects

ranef(own_quality2)
## $session
## , , Intercept
## 
##                             Estimate Est.Error       Q2.5     Q97.5
## 6033d69a5af2c702367b3a95  0.07257511 0.4853450 -0.8958344 1.2060655
## 6033d90a5af2c702367b3a96 -0.06719129 0.4655512 -1.1743671 0.8630083
## 6034fc165af2c702367b3a98  0.17905977 0.5349648 -0.6912089 1.5669707
## 603500725af2c702367b3a99  0.06251738 0.4870600 -0.8967308 1.2174737
## 603f97625af2c702367b3a9d  0.16271186 0.5225694 -0.7024703 1.5079167
## 603fd5d95af2c702367b3a9e  0.06217807 0.4687142 -0.8778565 1.1660744
## 60409b7b5af2c702367b3a9f -0.19689467 0.5279622 -1.5595090 0.6749143
## 604b82b5a7718fbed181b336 -0.01780412 0.4656230 -1.0741140 1.0102329
## 6050c1bf856f36729d2e5218 -0.20916246 0.5229909 -1.5947023 0.5935431
## 6050e1e7856f36729d2e5219  0.14039057 0.5056946 -0.7577146 1.4154486
## 6055fdc6856f36729d2e521b -0.07063987 0.5026330 -1.2795156 0.9609800
## 60589862856f36729d2e521f  0.03437286 0.5146656 -1.0082980 1.2447815
## 605afa3a856f36729d2e5222  0.16173501 0.5132568 -0.7044857 1.4873283
## 605c8bc6856f36729d2e5223  0.20520358 0.5392337 -0.6277315 1.6514052
## 605f3f2d856f36729d2e5224 -0.14931287 0.5247428 -1.5037436 0.7438351
## 605f46c3856f36729d2e5225 -0.19596157 0.5299663 -1.5836976 0.6472067
## 60605337856f36729d2e5226  0.02973059 0.4965926 -0.9911370 1.1770632
## 60609ae6856f36729d2e5228 -0.08101247 0.4916873 -1.2623611 0.8764413
## 6061ce91856f36729d2e522e  0.12577359 0.5007508 -0.7203002 1.3905577
## 6061f106856f36729d2e5231 -0.04526178 0.4834000 -1.1826973 0.9366807
## 6068ea9f856f36729d2e523e  0.02953905 0.4897309 -0.9708131 1.1652480
## 6075ab05856f36729d2e5247 -0.13579351 0.5219401 -1.4493301 0.7814682

Sampling plots

plot(own_quality2, ask = FALSE)

Posterior predictive check

pp_check(own_quality2, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

OwnQuality3

We select the best performing model with three variables.

own_quality3 <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session) +
    mo(education_level) +
    education_field +
    workplace_peer_review",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd"),
    prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
  ),
  family = cumulative(),
  data = as.data.frame(d.both_completed),
  file = "fits/own_quality3",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)

Summary

summary(own_quality3)
##  Family: cumulative 
##   Links: mu = logit; disc = identity 
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) + mo(education_level) + education_field + workplace_peer_review 
##    Data: as.data.frame(d.both_completed) (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.44      0.35     0.02     1.30 1.00     2245     1980
## 
## Population-Level Effects: 
##                                    Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1]                          -5.39      1.42    -8.25    -2.72 1.00
## Intercept[2]                          -2.82      1.29    -5.38    -0.40 1.00
## Intercept[3]                           0.26      1.27    -2.31     2.74 1.00
## Intercept[4]                           1.12      1.29    -1.45     3.73 1.00
## Intercept[5]                           2.15      1.37    -0.53     4.96 1.00
## var_names_copied_good.ratio            0.29      0.68    -1.03     1.66 1.00
## var_names_new_good.ratio              -0.14      0.76    -1.64     1.35 1.00
## reused_logic_validationfalse          -1.14      0.67    -2.46     0.17 1.00
## equals.existsFALSE                     0.03      0.59    -1.09     1.20 1.00
## sonarqube_issues.s                     0.33      0.34    -0.35     0.98 1.00
## documentationIncorrect                -0.22      0.72    -1.64     1.21 1.00
## documentationNone                     -0.30      0.68    -1.65     1.06 1.00
## education_fieldInteractionDesign      -0.55      0.84    -2.20     1.08 1.00
## education_fieldNone                    0.06      0.86    -1.65     1.75 1.00
## education_fieldSoftwareEngineering     0.18      0.61    -0.98     1.39 1.00
## workplace_peer_reviewfalse            -0.07      0.59    -1.24     1.07 1.00
## moeducation_level                     -0.60      0.26    -1.12    -0.10 1.00
##                                    Bulk_ESS Tail_ESS
## Intercept[1]                           5079     2992
## Intercept[2]                           6292     3235
## Intercept[3]                           6699     3323
## Intercept[4]                           6665     3418
## Intercept[5]                           7096     3520
## var_names_copied_good.ratio            6212     3517
## var_names_new_good.ratio               6631     3251
## reused_logic_validationfalse           6912     3165
## equals.existsFALSE                     7206     3488
## sonarqube_issues.s                     6610     3267
## documentationIncorrect                 6579     3475
## documentationNone                      6114     2692
## education_fieldInteractionDesign       7672     3349
## education_fieldNone                    7927     3290
## education_fieldSoftwareEngineering     7484     3490
## workplace_peer_reviewfalse             6657     2938
## moeducation_level                      4353     3136
## 
## Simplex Parameters: 
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## moeducation_level1[1]     0.22      0.12     0.03     0.50 1.00     7725
## moeducation_level1[2]     0.28      0.15     0.05     0.61 1.00     7058
## moeducation_level1[3]     0.24      0.14     0.03     0.55 1.00     7342
## moeducation_level1[4]     0.26      0.14     0.04     0.55 1.00     6524
##                       Tail_ESS
## moeducation_level1[1]     2572
## moeducation_level1[2]     2974
## moeducation_level1[3]     2944
## moeducation_level1[4]     3096
## 
## Family Specific Parameters: 
##      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc     1.00      0.00     1.00     1.00   NA       NA       NA
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects

ranef(own_quality3)
## $session
## , , Intercept
## 
##                             Estimate Est.Error       Q2.5     Q97.5
## 6033d69a5af2c702367b3a95  0.07292083 0.4786193 -0.8662791 1.1845024
## 6033d90a5af2c702367b3a96 -0.07020497 0.4741531 -1.2300104 0.8881639
## 6034fc165af2c702367b3a98  0.18877404 0.5511362 -0.7364588 1.5693337
## 603500725af2c702367b3a99  0.07164407 0.5091508 -0.9227960 1.2644073
## 603f97625af2c702367b3a9d  0.16239819 0.5276444 -0.7347354 1.5347753
## 603fd5d95af2c702367b3a9e  0.08224825 0.4784688 -0.8329144 1.1904050
## 60409b7b5af2c702367b3a9f -0.19021499 0.5249898 -1.5781463 0.6258941
## 604b82b5a7718fbed181b336 -0.01420662 0.4912693 -1.1131575 1.1353241
## 6050c1bf856f36729d2e5218 -0.21692246 0.5396178 -1.6765630 0.5771092
## 6050e1e7856f36729d2e5219  0.15955201 0.5396768 -0.7411380 1.5334869
## 6055fdc6856f36729d2e521b -0.08413992 0.4985885 -1.3187810 0.8729798
## 60589862856f36729d2e521f  0.03754052 0.5382152 -1.0320576 1.3512681
## 605afa3a856f36729d2e5222  0.16252356 0.5305735 -0.7440951 1.5147409
## 605c8bc6856f36729d2e5223  0.20906027 0.5536733 -0.6712950 1.6124915
## 605f3f2d856f36729d2e5224 -0.16107686 0.5186055 -1.5039316 0.7075811
## 605f46c3856f36729d2e5225 -0.20919387 0.5512730 -1.6114661 0.6417108
## 60605337856f36729d2e5226  0.02487511 0.5096511 -1.0500033 1.1841955
## 60609ae6856f36729d2e5228 -0.07561048 0.4857031 -1.2397214 0.8791959
## 6061ce91856f36729d2e522e  0.11888767 0.5150776 -0.7873797 1.4099026
## 6061f106856f36729d2e5231 -0.06716551 0.4942525 -1.2691431 0.9238359
## 6068ea9f856f36729d2e523e  0.03607078 0.5193162 -1.0147701 1.2208109
## 6075ab05856f36729d2e5247 -0.15907814 0.5226057 -1.5145098 0.7168122

Sampling plots

plot(own_quality3, ask = FALSE)

Posterior predictive check

pp_check(own_quality3, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

Final model

All candidate models look nice, none is significantly better than the others, we will proceed the model containing work experince as it otherwise ourd be added in the next step: own_quality0

All data points

Some participants did only complete one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.

own_quality0.all <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session)",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd")
  ),
  family = cumulative(),
  data = as.data.frame(d.completed),
  file = "fits/own_quality0.all",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)

Summary

summary(own_quality0.all)
##  Family: cumulative 
##   Links: mu = logit; disc = identity 
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) 
##    Data: as.data.frame(d.completed) (Number of observations: 51) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 29) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.52      0.40     0.01     1.44 1.00     1369     1807
## 
## Population-Level Effects: 
##                              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1]                    -4.79      1.24    -7.30    -2.46 1.00     3860
## Intercept[2]                    -3.50      1.07    -5.65    -1.42 1.00     5106
## Intercept[3]                    -1.43      0.98    -3.40     0.51 1.00     5524
## Intercept[4]                     1.46      1.00    -0.43     3.46 1.00     4704
## Intercept[5]                     2.20      1.04     0.20     4.27 1.00     5141
## Intercept[6]                     3.15      1.15     0.91     5.43 1.00     5281
## var_names_copied_good.ratio      0.19      0.67    -1.13     1.48 1.00     5208
## var_names_new_good.ratio        -0.03      0.75    -1.51     1.38 1.00     5992
## reused_logic_validationfalse    -1.30      0.65    -2.55    -0.01 1.00     5323
## equals.existsFALSE              -0.15      0.58    -1.29     1.02 1.00     5245
## sonarqube_issues.s               0.08      0.31    -0.57     0.67 1.00     5765
## documentationIncorrect          -0.16      0.64    -1.42     1.10 1.00     4830
## documentationNone               -0.30      0.60    -1.45     0.94 1.00     5136
##                              Tail_ESS
## Intercept[1]                     2992
## Intercept[2]                     3107
## Intercept[3]                     2701
## Intercept[4]                     3188
## Intercept[5]                     3258
## Intercept[6]                     2974
## var_names_copied_good.ratio      3367
## var_names_new_good.ratio         3114
## reused_logic_validationfalse     3515
## equals.existsFALSE               2902
## sonarqube_issues.s               3206
## documentationIncorrect           2932
## documentationNone                2934
## 
## Family Specific Parameters: 
##      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc     1.00      0.00     1.00     1.00   NA       NA       NA
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects

ranef(own_quality0.all)
## $session
## , , Intercept
## 
##                              Estimate Est.Error       Q2.5     Q97.5
## 6033c6fc5af2c702367b3a93 -0.372408386 0.7841800 -2.4500565 0.6509531
## 6033d69a5af2c702367b3a95  0.040479850 0.5375009 -1.0863194 1.2587577
## 6033d90a5af2c702367b3a96  0.045434348 0.5189697 -1.0625645 1.2324326
## 6034fc165af2c702367b3a98  0.237236539 0.6271860 -0.7108012 1.8404439
## 603500725af2c702367b3a99  0.230201781 0.5779790 -0.7195627 1.6608285
## 603f84f15af2c702367b3a9b  0.016198984 0.5882814 -1.2750833 1.3550994
## 603f97625af2c702367b3a9d  0.212381556 0.5892914 -0.7669954 1.6730726
## 603fd5d95af2c702367b3a9e  0.237127902 0.6008399 -0.7474289 1.7871624
## 60409b7b5af2c702367b3a9f -0.204543679 0.5660094 -1.6241742 0.7465444
## 604b82b5a7718fbed181b336 -0.065847643 0.5548136 -1.3918616 1.0274548
## 604f1239a7718fbed181b33f  0.017712987 0.5674296 -1.2371173 1.2916858
## 6050c1bf856f36729d2e5218 -0.294707800 0.6393358 -1.9273966 0.6900997
## 6050e1e7856f36729d2e5219  0.302869045 0.6535337 -0.6555388 2.0016795
## 6055fdc6856f36729d2e521b -0.057627828 0.5651258 -1.3821086 1.1493950
## 60579f2a856f36729d2e521e -0.230201023 0.6270218 -1.8978505 0.7629862
## 60589862856f36729d2e521f  0.026791893 0.6286982 -1.3312424 1.4598502
## 605a30a7856f36729d2e5221  0.003214293 0.5553910 -1.2410379 1.2123805
## 605afa3a856f36729d2e5222  0.143033731 0.5647349 -0.8942491 1.4892687
## 605c8bc6856f36729d2e5223  0.140888711 0.5391848 -0.8319169 1.4732160
## 605f3f2d856f36729d2e5224 -0.206526332 0.5789037 -1.6890799 0.7750839
## 605f46c3856f36729d2e5225 -0.377421560 0.6629626 -2.1120662 0.5013496
## 60605337856f36729d2e5226 -0.007595143 0.5488059 -1.2171280 1.1694757
## 60609ae6856f36729d2e5228 -0.077252498 0.5197291 -1.2715670 0.9599132
## 6061ce91856f36729d2e522e  0.266236684 0.6018718 -0.6589558 1.8098424
## 6061f106856f36729d2e5231  0.059041868 0.5308058 -1.0556227 1.2695254
## 60672faa856f36729d2e523c  0.018507918 0.5849574 -1.2720438 1.2728496
## 6068ea9f856f36729d2e523e  0.229267136 0.6015340 -0.7346368 1.7698431
## 606db69d856f36729d2e5243  0.010330534 0.5718739 -1.2485315 1.2439217
## 6075ab05856f36729d2e5247 -0.390393987 0.6810910 -2.2353987 0.4875631

Sampling plots

plot(own_quality0.all, ask = FALSE)

Posterior predictive check

pp_check(own_quality0.all, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

Final model

  • Fitting the model to all data point did not significantly damage the model and will be used as is a more fair representation of reality.

This means that our final model, with all data points and experience predictors, is own_quality0.all

Interpreting the model

To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.

\(\beta\) parameters

mcmc_areas(own_quality0.all, 
           pars = c(
             "b_var_names_copied_good.ratio", 
             "b_var_names_new_good.ratio", 
             "b_reused_logic_validationfalse",
             "b_equals.existsFALSE",
             "b_sonarqube_issues.s",
             "b_documentationIncorrect",
             "b_documentationNone"
                    ),
           prob = 0.95) + scale_y_discrete() +
  scale_y_discrete(labels=c(
    "Ratio of good copied var names",
    "Ratio of good new var names",
    "Duplicated validation logic",
    "Missing equals implementation",
    "Amount of sonarqube issues",
    "Incorrect documentation",
    "No documentation"
    )) +
  ggtitle("Beta parameters densities in self assesed quality model", subtitle = "Shaded region marks 95% of the density. Line marks the median")

As we have a low of effects playing small roles we will simulate two scenarios, one where the developer, according to us does well and one where the developers does not do so well and see if the participant rating approves with us.

scale_programming_experience <- function(x) {
  (x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
  x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}

post_settings <- data_frame(
  var_names_copied_good.ratio = c(0.9, 0.5),
  var_names_new_good.ratio = c(0.9, 0.5),
  reused_logic_validation = c("true", "false"),
  equals.exists = c("TRUE", "FALSE"),
  sonarqube_issues.s = c(-1, 1),
  documentation = c("Correct", "Incorrect"),
  session = NA
)

post <- posterior_predict(own_quality0.all, newdata = post_settings) %>%
  melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
  left_join(
    rowid_to_column(post_settings, var= "settings_id"),
    by = "settings_id"
  ) %>%
  mutate(submission = revalue(reused_logic_validation, c("true" = "Good", "false" = "Bad"))) %>%
  select(
    estimate,
    submission
  )
post.nice <- post %>%  mutate_at("estimate", function(x) revalue(as.ordered(x), c(
      "1"="Very Bad",
      "2"="Bad",
      "3"="Somewhat Bad",
      "4"="Neutral",
      "5"="Somewhat Good",
      "6"="Good",
      "7"="Very Good"
    )))
    


vline.data <- post.nice %>%
              group_by(submission) %>%
                    summarize(z = mean(as.numeric(estimate)))

sprintf("Estimations for 3 years experience")
## [1] "Estimations for 3 years experience"
post.nice %>%
    ggplot() +
      geom_histogram(aes(x=estimate),fill= "Light Blue", stat = "count") +
      geom_vline(aes(xintercept = z),
            vline.data,
             col = "Dark Blue",
             lwd = 1)+
      facet_grid(rows = vars(submission)) +
        scale_y_continuous(limits = NULL, breaks = sapply(c(0.1, 0.2, 0.3, 0.4, 0.5), function(x) x*nrow(post.nice) / 2), labels = c("10%","20%","30%","40%", "50%")) +
      theme(axis.title.x=element_blank(),
            axis.title.y=element_blank())

post.diff <- post %>% filter(submission == "Bad")
post.diff$estimate = post.diff$estimate -  filter(post, submission == "Good")$estimate

post.diff %>%
  ggplot(aes(x=estimate)) +
  geom_boxplot(quantile_lines = TRUE, quantile_fun = hdi, vline_linetype = 2) +
  labs(
    title = "Submission rating diff",
    subtitle = "Difference as: bad submission rating - good submission rating",
    x = "Rating difference"
  ) +
  scale_y_continuous(breaks = NULL)

We can then proceed to calculate some likelihoods:

bad_rated_higher <- sum(post.diff < 0)
good_rated_higher <- sum(post.diff > 0)
x <- good_rated_higher / bad_rated_higher
x
## [1] 2.24108

Participants were 124% more likely to rate the bad submission as worse then they were to rate the good submission as worse.

---
title: "Self-Reported Submission Quality Model"
author: Hampus Broman & William Levén
date: 2021-05
output: 
  html_document: 
    pandoc_args: [ "-o", "docs/self-reported_submission_quality.html" ]
---

```{r include-setup, include=FALSE}
# Load setup file
source(knitr::purl('setup.Rmd', output = tempfile()))
```



## Looking at the data

Looks like there is a slightly higher rate of negative ratings for the high debt group, but also an even smaller increase in positive ratings.

```{r}

likert.data <- d.both_completed %>%
  select(high_debt_version, quality_post_task)

likert.data$quality_post_task <- revalue(likert.data$quality_post_task, c(
      "-3"="Very Bad",
      "-2"="Bad",
      "-1"="Somewhat Bad",
      "0"="Neutral",
      "1"="Somewhat Good",
      "2"="Good",
      "3"="Very Good"
    ))

likert.data$high_debt_version <- revalue(likert.data$high_debt_version, c(
      "true"="High Debt",
      "false"="Low Debt"
    ))

ggplot(likert.data, aes(x=quality_post_task)) +
  geom_bar(fill= "Light Blue") +
  facet_grid(rows = vars(high_debt_version)) +
    scale_y_continuous(limits = NULL, breaks = c(2,4,6,8), labels = c("2","4","6","8")) +
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank())
```

## Initial model
As the data is collected from a likert scale we will use a cumulative family, indicating that each level on the scale is an incremental step. This model is also able to fit the data well.

We include `high_debt_verison` as a predictor in our model as this variable represent the very effect we want to measure.
We also include a varying intercept for each individual to prevent the model from learning too much from single participants with extreme measurements.

### Selecting priors {.tabset}

We iterate over the model until we have sane priors.

```{r initial-model-definition, class.source = 'fold-show'}
own_quality.with <- extendable_model(
  base_name = "own_quality",
  base_formula = "quality_post_task ~ 1  + (1 | session)",
    base_priors = c(
    prior(normal(0, 2.5), class = "Intercept"),
    #prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd")
  ),
  family = cumulative(),
  data = d.both_completed,
  base_control = list(adapt_delta = 0.95)
)
```

#### Default priors

```{r default-priors}
prior_summary(own_quality.with(only_priors= TRUE))
```

#### Selected priors

```{r selected-priors, warning=FALSE}
prior_summary(own_quality.with(sample_prior = "only"))
```

#### Prior predictive check

```{r priors-check, warning=FALSE}
pp_check(own_quality.with(sample_prior = "only"), nsamples = 200, type = "bars")
```

#### Beta parameter influence

We choose a beta parameter priors allowing for the beta parameter to account for 100% of the effect but that is skeptical to such strong effects from the beta parameter.

```{r priors-beta, warning=FALSE}
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 0, 2.5)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100

data.frame(x = sim.beta.diff) %>%
  ggplot(aes(x)) +
  geom_density() +
  xlim(-150, 150) +
  labs(
    title = "Beta parameter prior influence",
    x = "Estimate with beta as % of estimate without beta",
    y = "Density"
  )

```

### Model fit {.tabset}

We check the posterior distribution and can see that the model seems to have been able to fit the data well. 
Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.

#### Posterior predictive check

```{r base-pp-check}
pp_check(own_quality.with(), nsamples = 200, type = "bars")
```

#### Summary

```{r base-summary}
summary(own_quality.with())
```

#### Sampling plots

```{r base-plot}
plot(own_quality.with(), ask = FALSE)
```

## Model quality extenstions {.tabset}

```{r mo-priors}
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
# deafult beta prior
beta_prior <- prior(normal(0, 1), class = "b")
```

We use `loo` to check some possible extensions on the model.

### Variable names {.tabset}

```{r model-extension-q1, warning=FALSE, class.source = 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  
  # New model(s)
  own_quality.with("var_names_new_good.ratio", beta_prior),
  own_quality.with("var_names_copied_good.ratio", beta_prior),
  own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior)
)
```

#### Comparison

```{r model-extension-q1-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-q1-dig, warning=FALSE}
loo_result[1]
```

### Reuse {.tabset}

```{r model-extension-q2, warning=FALSE, class.source = 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  
  # New model(s)
  own_quality.with("reused_logic_constructor", beta_prior),
  own_quality.with("reused_logic_validation", beta_prior),
  own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior)
)
```

#### Comparison

```{r model-extension-q2-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-q2-dig, warning=FALSE}
loo_result[1]
```

### Utility methods {.tabset}

```{r model-extension-q3, warning=FALSE, class.source = 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  
  # New model(s)
  own_quality.with("equals.exists", beta_prior),
  own_quality.with("hashcode.exists", beta_prior),
  own_quality.with(c("hashcode.state", "equals.exists"), beta_prior)
)
```

#### Comparison

```{r model-extension-q3-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-q3-dig, warning=FALSE}
loo_result[1]
```

### Other quality attributes {.tabset}

```{r model-extension-q4, warning=FALSE, class.source = 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  
  # New model(s)
  own_quality.with("sonarqube_issues.s"),
  own_quality.with("documentation")
)
```

#### Comparison

```{r model-extension-q4-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-q4-dig, warning=FALSE}
loo_result[1]
```

## Model predictor extenstions {.tabset}

We create a new base model based on what we learned from extending the previous model with different quality measurements. We take care to not include redundant quality indicators.

```{r initial-model-definition-2, class.source = 'fold-show'}
own_quality1.with <- extendable_model(
  base_name = "own_quality1",
  base_formula = "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session)",
    base_priors = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd")
  ),
  family = cumulative(),
  data = data.frame(d.both_completed),
  base_control = list(adapt_delta = 0.95)
)
```

### One variable {.tabset}

```{r model-extension-1, warning=FALSE, class.source = 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  own_quality1.with(),
  
  # New model(s)
  own_quality1.with("work_domain"),
  own_quality1.with("work_experience_programming.s"),
  own_quality1.with("work_experience_java.s"),
  own_quality1.with("education_field"),
  own_quality1.with("mo(education_level)", edlvl_prior),
  own_quality1.with("workplace_peer_review"),
  own_quality1.with("workplace_td_tracking"),
  own_quality1.with("workplace_pair_programming"),
  own_quality1.with("workplace_coding_standards"),
  own_quality1.with("scenario"),
  own_quality1.with("group")
)
```

#### Comparison

```{r model-extension-1-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-1-dig, warning=FALSE}
loo_result[1]
```

### Two variables {.tabset}

```{r model-extension-2, warning=FALSE, class.source = 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  own_quality1.with(),
  
  own_quality1.with("mo(education_level)", edlvl_prior),
  own_quality1.with("education_field"),
  own_quality1.with("workplace_peer_review"),
  
  # New model(s)
  own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior),
  own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior),
  own_quality1.with(c("education_field", "workplace_peer_review"))
)
```

#### Comparison

```{r model-extension-2-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-2-dig, warning=FALSE}
loo_result[1]
```

### Three variables {.tabset}

```{r model-extension-3, warning=FALSE, class.source = 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  own_quality.with(),
  own_quality1.with(),
  
  own_quality1.with("mo(education_level)", edlvl_prior),
  own_quality1.with("education_field"),
  own_quality1.with("workplace_peer_review"),
  
  own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior),
  own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior),
  
  # New model(s)
  own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior)
)
```

#### Comparison

```{r model-extension-3-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-3-dig, warning=FALSE}
loo_result[1]
```

## Candidate models  {.tabset}
We pick some of our top performing models as candidates and inspect them closer.

The candidate models are named and listed in order of complexity.

### OwnQuality0  {.tabset}

We select the simplest model as a baseline.

```{r own_quality0, class.source = 'fold-show', warning=FALSE, message=FALSE}
own_quality0 <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session)",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd")
  ),
  family = cumulative(),
  data = as.data.frame(d.both_completed),
  file = "fits/own_quality0",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)
```

#### Summary

```{r own_quality0-sum}
summary(own_quality0)
```

#### Random effects

```{r own_quality0-raneff}
ranef(own_quality0)
```

#### Sampling plots

```{r own_quality0-plot}
plot(own_quality0, ask = FALSE)
```

#### Posterior predictive check

```{r own_quality0-pp}
pp_check(own_quality0, nsamples = 200, type = "bars")
```

### OwnQuality1  {.tabset}

We select the best performing model with one variable.

```{r own_quality1, class.source = 'fold-show', warning=FALSE, message=FALSE}
own_quality1 <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session) +
    mo(education_level)",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd"),
    prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
  ),
  family = cumulative(),
  data = as.data.frame(d.both_completed),
  file = "fits/own_quality1",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)
```

#### Summary

```{r own_quality1-sum}
summary(own_quality1)
```

#### Random effects

```{r own_quality1-raneff}
ranef(own_quality1)
```

#### Sampling plots

```{r own_quality1-plot}
plot(own_quality1, ask = FALSE)
```

#### Posterior predictive check

```{r own_quality1-pp}
pp_check(own_quality1, nsamples = 200, type = "bars")
```

### OwnQuality2  {.tabset}

We select the best performing model with two variables.

```{r own_quality2, class.source = 'fold-show', warning=FALSE, message=FALSE}
own_quality2 <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session) +
    mo(education_level) +
    education_field",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd"),
    prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
  ),
  family = cumulative(),
  data = as.data.frame(d.both_completed),
  file = "fits/own_quality2",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)
```

#### Summary

```{r own_quality2-sum}
summary(own_quality2)
```

#### Random effects

```{r own_quality2-raneff}
ranef(own_quality2)
```

#### Sampling plots

```{r own_quality2-plot}
plot(own_quality2, ask = FALSE)
```

#### Posterior predictive check

```{r own_quality2-pp}
pp_check(own_quality2, nsamples = 200, type = "bars")
```

### OwnQuality3  {.tabset}

We select the best performing model with three variables.

```{r own_quality3, class.source = 'fold-show', warning=FALSE, message=FALSE}
own_quality3 <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session) +
    mo(education_level) +
    education_field +
    workplace_peer_review",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd"),
    prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
  ),
  family = cumulative(),
  data = as.data.frame(d.both_completed),
  file = "fits/own_quality3",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)
```

#### Summary

```{r own_quality3-sum}
summary(own_quality3)
```

#### Random effects

```{r own_quality3-raneff}
ranef(own_quality3)
```

#### Sampling plots

```{r own_quality3-plot}
plot(own_quality3, ask = FALSE)
```

#### Posterior predictive check

```{r own_quality3-pp}
pp_check(own_quality3, nsamples = 200, type = "bars")
```

## Final model 
All candidate models look nice, none is significantly better than the others, we will proceed the model containing work experince as it otherwise ourd be added in the next step: `own_quality0`


### All data points {.tabset}

Some participants did only complete one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.

```{r variation.all, class.source = 'fold-show'}
own_quality0.all <- brm(
  "quality_post_task ~ 1 + 
    var_names_copied_good.ratio + 
    var_names_new_good.ratio + 
    reused_logic_validation + 
    equals.exists + 
    sonarqube_issues.s + 
    documentation + 
    (1 | session)",
  prior = c(
    prior(normal(0, 2.5), class = "Intercept"),
    prior(normal(0, 1), class = "b"),
    prior(exponential(1), class = "sd")
  ),
  family = cumulative(),
  data = as.data.frame(d.completed),
  file = "fits/own_quality0.all",
  file_refit = "on_change",
  control = list(adapt_delta = 0.95),
  seed = 20210421
)
```

#### Summary

```{r variation.all-sum}
summary(own_quality0.all)
```

#### Random effects

```{r variation.all-raneff}
ranef(own_quality0.all)
```

#### Sampling plots

```{r variation.all-plot}
plot(own_quality0.all, ask = FALSE)
```

#### Posterior predictive check

```{r variation.all-pp}
pp_check(own_quality0.all, nsamples = 200, type = "bars")
```

### Final model
* Fitting the model to all data point did not significantly damage the model and will be used as is a more fair representation of reality.

This means that our final model, with all data points and experience predictors, is `own_quality0.all`

## Interpreting the model
To begin interpreting the model we look at how it's parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the $\beta$ parameters.

### $\beta$ parameters
```{r interpret-beta-plot, warning=FALSE, message=FALSE}
mcmc_areas(own_quality0.all, 
           pars = c(
             "b_var_names_copied_good.ratio", 
             "b_var_names_new_good.ratio", 
             "b_reused_logic_validationfalse",
             "b_equals.existsFALSE",
             "b_sonarqube_issues.s",
             "b_documentationIncorrect",
             "b_documentationNone"
                    ),
           prob = 0.95) + scale_y_discrete() +
  scale_y_discrete(labels=c(
    "Ratio of good copied var names",
    "Ratio of good new var names",
    "Duplicated validation logic",
    "Missing equals implementation",
    "Amount of sonarqube issues",
    "Incorrect documentation",
    "No documentation"
    )) +
  ggtitle("Beta parameters densities in self assesed quality model", subtitle = "Shaded region marks 95% of the density. Line marks the median")
```

As we have a low of effects playing small roles we will simulate two scenarios, one where the developer, according to us does well and one where the developers does not do so well and see if the participant rating approves with us.


```{r interpret-post, message=FALSE, warning=FALSE}
scale_programming_experience <- function(x) {
  (x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
  x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}

post_settings <- data_frame(
  var_names_copied_good.ratio = c(0.9, 0.5),
  var_names_new_good.ratio = c(0.9, 0.5),
  reused_logic_validation = c("true", "false"),
  equals.exists = c("TRUE", "FALSE"),
  sonarqube_issues.s = c(-1, 1),
  documentation = c("Correct", "Incorrect"),
  session = NA
)

post <- posterior_predict(own_quality0.all, newdata = post_settings) %>%
  melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
  left_join(
    rowid_to_column(post_settings, var= "settings_id"),
    by = "settings_id"
  ) %>%
  mutate(submission = revalue(reused_logic_validation, c("true" = "Good", "false" = "Bad"))) %>%
  select(
    estimate,
    submission
  )
post.nice <- post %>%  mutate_at("estimate", function(x) revalue(as.ordered(x), c(
      "1"="Very Bad",
      "2"="Bad",
      "3"="Somewhat Bad",
      "4"="Neutral",
      "5"="Somewhat Good",
      "6"="Good",
      "7"="Very Good"
    )))
    


vline.data <- post.nice %>%
              group_by(submission) %>%
                    summarize(z = mean(as.numeric(estimate)))

sprintf("Estimations for 3 years experience")

post.nice %>%
    ggplot() +
      geom_histogram(aes(x=estimate),fill= "Light Blue", stat = "count") +
      geom_vline(aes(xintercept = z),
            vline.data,
             col = "Dark Blue",
             lwd = 1)+
      facet_grid(rows = vars(submission)) +
        scale_y_continuous(limits = NULL, breaks = sapply(c(0.1, 0.2, 0.3, 0.4, 0.5), function(x) x*nrow(post.nice) / 2), labels = c("10%","20%","30%","40%", "50%")) +
      theme(axis.title.x=element_blank(),
            axis.title.y=element_blank())
```

```{r effect-size-diff, warning=FALSE, message=FALSE}
post.diff <- post %>% filter(submission == "Bad")
post.diff$estimate = post.diff$estimate -  filter(post, submission == "Good")$estimate

post.diff %>%
  ggplot(aes(x=estimate)) +
  geom_boxplot(quantile_lines = TRUE, quantile_fun = hdi, vline_linetype = 2) +
  labs(
    title = "Submission rating diff",
    subtitle = "Difference as: bad submission rating - good submission rating",
    x = "Rating difference"
  ) +
  scale_y_continuous(breaks = NULL)
```

We can then proceed to calculate some likelihoods:

```{r, class.source = 'fold-show'}
bad_rated_higher <- sum(post.diff < 0)
good_rated_higher <- sum(post.diff > 0)
x <- good_rated_higher / bad_rated_higher
x
```
Participants were `r scales::label_percent()(x - 1)` more likely to rate the bad submission as worse then they were to rate the good submission as worse. 
