Looking at the data

There appears to be significant difference in the rate of bad variable naming between the low and high debt groups.

New variable names

d.both_completed %>%
  ggplot(aes(x=var_names_new_good.ratio, fill=high_debt_version)) + 
  geom_boxplot() +
  labs(
    title = "Distribution of good variable naming rate (new variables)",
    x ="Ratio of good variable name selection"
  ) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(
    name = "Debt level", 
    labels = c("High debt", "Low debt"), 
    values = c("#7070FF", "lightblue"), 
    guide = guide_legend(reverse = TRUE)
  ) 

Copied variable names

d.both_completed %>%
  ggplot(aes(x=var_names_copied_good.ratio, fill=high_debt_version)) + 
  geom_boxplot() +
  labs(
    title = "Distribution of good variable naming rate (copied variables)",
    x ="Ratio of good variable name selection"
  ) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(
    name = "Debt level", 
    labels = c("High debt", "Low debt"), 
    values = c("#7070FF", "lightblue"), 
    guide = guide_legend(reverse = TRUE)
  ) 

Descriptive statistics:

New variable names

d.both_completed %>%
  pull(var_names_new_good.ratio) %>% 
  summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  1.0000  1.0000  0.8425  1.0000  1.0000
sprintf("Variance: %.2f", var(pull(d.both_completed, var_names_new_good.ratio)))
## [1] "Variance: 0.10"

Copied variable names

d.both_completed %>%
  pull(var_names_copied_good.ratio) %>% 
  summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1215  1.0000  0.6556  1.0000  1.0000
sprintf("Variance: %.2f", var(pull(d.both_completed, var_names_copied_good.ratio)))
## [1] "Variance: 0.18"

Initial model

Variable names are modeled using the binomial family, where the amount of trials is the total amount of new/copied variable names.

We include high_debt_verison as well as a varying intercept for each individual in our initial model.

Selecting priors

As the the data represents a series on bernoulli trials we chose a binomial model.

We iterate over the model until we have sane priors, that are able to fit the data resonably well.. The prior “lkj(2)” will mean the model is skeptical of strong correlations.

Base model with priors

variable_names.with <- extendable_model(
  base_name = "variable_names",
  base_formula = "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1  | session)",
  base_priors = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  base_control = list(adapt_delta = 0.95)
)

Default priors

prior_summary(variable_names.with(only_priors= TRUE))

Selected priors

prior_summary(variable_names.with(sample_prior = "only"))

Prior predictive check

pp_check(variable_names.with(sample_prior = "only"), nsamples = 200)

Beta parameter influence

We choose a beta parameter priors allowing for the beta parameter to account for 50% of the effect but that is skeptical to such strong effects from the beta parameter.

sim.size <- 1000
sim.intercept <- rnorm(sim.size, 2, 1)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100

data.frame(x = sim.beta.diff) %>%
  ggplot(aes(x)) +
  geom_density() +
  xlim(-80, 80) +
  labs(
    title = "Beta parameter prior influence",
    x = "Estimate with beta as % of estimate without beta",
    y = "Density"
  )

Model fit

We check the posterior distribution and can see that the model seems to have been able to fit the data well Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.

Posterior predictive check

pp_check(variable_names.with(), nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

Summary

summary(variable_names.with())
##  Family: binomial 
##   Links: mu = logit 
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session) 
##    Data: as.data.frame(data) (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.77      0.61     0.77     3.07 1.00     1525     2222
## 
## Population-Level Effects: 
##                        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                  1.50      0.51     0.58     2.58 1.00     1915
## high_debt_versionfalse     2.39      0.58     1.31     3.55 1.00     3669
##                        Tail_ESS
## Intercept                  2471
## high_debt_versionfalse     2925
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Sampling plots

plot(variable_names.with(), ask = FALSE)

Model predictor extenstions

# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")

One variable

loo_result <- loo(
  # Benchmark model(s)
  variable_names.with(),
  
  # New model(s)
  variable_names.with("work_domain"),
  variable_names.with("work_experience_programming.s"),
  variable_names.with("work_experience_java.s"),
  variable_names.with("education_field"),
  variable_names.with("mo(education_level)", edlvl_prior),
  variable_names.with("workplace_peer_review"),
  variable_names.with("workplace_td_tracking"),
  variable_names.with("workplace_pair_programming"),
  variable_names.with("workplace_coding_standards"),
  variable_names.with("scenario"),
  variable_names.with("group")
)

Comparison

loo_result[2]
## $diffs
##                                                         elpd_diff se_diff
## variable_names.with()                                    0.0       0.0   
## variable_names.with("work_experience_java.s")           -0.2       0.4   
## variable_names.with("scenario")                         -0.5       1.2   
## variable_names.with("workplace_coding_standards")       -0.9       0.6   
## variable_names.with("workplace_pair_programming")       -1.2       0.4   
## variable_names.with("work_experience_programming.s")    -1.3       0.5   
## variable_names.with("workplace_td_tracking")            -1.4       0.4   
## variable_names.with("education_field")                  -1.4       1.2   
## variable_names.with("work_domain")                      -1.6       0.6   
## variable_names.with("workplace_peer_review")            -1.8       0.6   
## variable_names.with("group")                            -2.1       0.7   
## variable_names.with("mo(education_level)", edlvl_prior) -2.1       0.9

Diagnostics

loo_result[1]
## $loos
## $loos$`variable_names.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -35.4  5.9
## p_loo        13.9  2.6
## looic        70.8 11.9
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   1378      
##  (0.5, 0.7]   (ok)        9    20.5%   284       
##    (0.7, 1]   (bad)      11    25.0%   73        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("work_domain")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -37.0  6.3
## p_loo        15.9  3.2
## looic        73.9 12.7
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   898       
##  (0.5, 0.7]   (ok)       11    25.0%   132       
##    (0.7, 1]   (bad)       8    18.2%   66        
##    (1, Inf)   (very bad)  1     2.3%   36        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("work_experience_programming.s")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.7  6.2
## p_loo        15.3  2.9
## looic        73.4 12.3
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   1228      
##  (0.5, 0.7]   (ok)       12    27.3%   181       
##    (0.7, 1]   (bad)      11    25.0%   66        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("work_experience_java.s")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -35.6  5.9
## p_loo        14.4  2.6
## looic        71.2 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     20    45.5%   1289      
##  (0.5, 0.7]   (ok)       17    38.6%   219       
##    (0.7, 1]   (bad)       7    15.9%   55        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("education_field")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.8  6.1
## p_loo        13.7  2.6
## looic        73.7 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   649       
##  (0.5, 0.7]   (ok)       10    22.7%   252       
##    (0.7, 1]   (bad)      10    22.7%   53        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("mo(education_level)", edlvl_prior)`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -37.5  6.5
## p_loo        16.1  3.2
## looic        75.0 13.0
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     22    50.0%   863       
##  (0.5, 0.7]   (ok)        9    20.5%   300       
##    (0.7, 1]   (bad)      12    27.3%   34        
##    (1, Inf)   (very bad)  1     2.3%   9         
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("workplace_peer_review")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -37.2  6.3
## p_loo        15.7  3.1
## looic        74.3 12.7
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   1283      
##  (0.5, 0.7]   (ok)        7    15.9%   222       
##    (0.7, 1]   (bad)      12    27.3%   43        
##    (1, Inf)   (very bad)  1     2.3%   57        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("workplace_td_tracking")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.8  6.2
## p_loo        15.3  3.0
## looic        73.5 12.5
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     22    50.0%   771       
##  (0.5, 0.7]   (ok)       12    27.3%   177       
##    (0.7, 1]   (bad)      10    22.7%   34        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("workplace_pair_programming")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.5  6.2
## p_loo        15.1  2.9
## looic        73.1 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     22    50.0%   1589      
##  (0.5, 0.7]   (ok)       13    29.5%   209       
##    (0.7, 1]   (bad)       7    15.9%   65        
##    (1, Inf)   (very bad)  2     4.5%   43        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("workplace_coding_standards")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.3  6.2
## p_loo        15.1  3.0
## looic        72.6 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   1266      
##  (0.5, 0.7]   (ok)       13    29.5%   323       
##    (0.7, 1]   (bad)       8    18.2%   88        
##    (1, Inf)   (very bad)  2     4.5%   29        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("scenario")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -35.9  6.0
## p_loo        14.9  2.8
## looic        71.7 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   1255      
##  (0.5, 0.7]   (ok)       14    31.8%   305       
##    (0.7, 1]   (bad)       9    20.5%   35        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("group")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -37.5  6.4
## p_loo        16.3  3.1
## looic        74.9 12.7
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   1319      
##  (0.5, 0.7]   (ok)       10    22.7%   276       
##    (0.7, 1]   (bad)      12    27.3%   30        
##    (1, Inf)   (very bad)  1     2.3%   63        
## See help('pareto-k-diagnostic') for details.

Two variables

loo_result <- loo(
  # Benchmark model(s)
  variable_names.with(),
  variable_names.with("work_experience_programming.s"),
  variable_names.with("education_field"),
  variable_names.with("work_experience_java.s"),
  variable_names.with("workplace_pair_programming"),
  
  # New model(s)
  variable_names.with(c("work_experience_programming.s", "education_field")),
  variable_names.with(c("work_experience_programming.s", "work_experience_java.s")),
  variable_names.with(c("work_experience_programming.s", "workplace_pair_programming")),
  
  variable_names.with(c("education_field", "work_experience_java.s")),
  variable_names.with(c("education_field", "workplace_pair_programming")),
  
  variable_names.with(c("work_experience_java.s", "workplace_pair_programming"))
)

Comparison

loo_result[2]
## $diffs
##                                                                                       elpd_diff
## variable_names.with()                                                                  0.0     
## variable_names.with("work_experience_java.s")                                         -0.2     
## variable_names.with(c("work_experience_programming.s", "education_field"))            -0.8     
## variable_names.with("workplace_pair_programming")                                     -1.2     
## variable_names.with("work_experience_programming.s")                                  -1.3     
## variable_names.with(c("education_field", "work_experience_java.s"))                   -1.4     
## variable_names.with("education_field")                                                -1.4     
## variable_names.with(c("education_field", "workplace_pair_programming"))               -1.5     
## variable_names.with(c("work_experience_programming.s", "workplace_pair_programming")) -1.9     
## variable_names.with(c("work_experience_java.s", "workplace_pair_programming"))        -1.9     
## variable_names.with(c("work_experience_programming.s", "work_experience_java.s"))     -1.9     
##                                                                                       se_diff
## variable_names.with()                                                                  0.0   
## variable_names.with("work_experience_java.s")                                          0.4   
## variable_names.with(c("work_experience_programming.s", "education_field"))             1.1   
## variable_names.with("workplace_pair_programming")                                      0.4   
## variable_names.with("work_experience_programming.s")                                   0.5   
## variable_names.with(c("education_field", "work_experience_java.s"))                    1.2   
## variable_names.with("education_field")                                                 1.2   
## variable_names.with(c("education_field", "workplace_pair_programming"))                1.2   
## variable_names.with(c("work_experience_programming.s", "workplace_pair_programming"))  0.6   
## variable_names.with(c("work_experience_java.s", "workplace_pair_programming"))         0.7   
## variable_names.with(c("work_experience_programming.s", "work_experience_java.s"))      0.5

Diagnostics

loo_result[1]
## $loos
## $loos$`variable_names.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -35.4  5.9
## p_loo        13.9  2.6
## looic        70.8 11.9
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   1378      
##  (0.5, 0.7]   (ok)        9    20.5%   284       
##    (0.7, 1]   (bad)      11    25.0%   73        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("work_experience_programming.s")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.7  6.2
## p_loo        15.3  2.9
## looic        73.4 12.3
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   1228      
##  (0.5, 0.7]   (ok)       12    27.3%   181       
##    (0.7, 1]   (bad)      11    25.0%   66        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("education_field")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.8  6.1
## p_loo        13.7  2.6
## looic        73.7 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   649       
##  (0.5, 0.7]   (ok)       10    22.7%   252       
##    (0.7, 1]   (bad)      10    22.7%   53        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("work_experience_java.s")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -35.6  5.9
## p_loo        14.4  2.6
## looic        71.2 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     20    45.5%   1289      
##  (0.5, 0.7]   (ok)       17    38.6%   219       
##    (0.7, 1]   (bad)       7    15.9%   55        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("workplace_pair_programming")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.5  6.2
## p_loo        15.1  2.9
## looic        73.1 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     22    50.0%   1589      
##  (0.5, 0.7]   (ok)       13    29.5%   209       
##    (0.7, 1]   (bad)       7    15.9%   65        
##    (1, Inf)   (very bad)  2     4.5%   43        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("work_experience_programming.s", "education_field"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.2  6.0
## p_loo        14.0  2.5
## looic        72.5 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   711       
##  (0.5, 0.7]   (ok)       12    27.3%   159       
##    (0.7, 1]   (bad)       8    18.2%   40        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("work_experience_programming.s", "work_experience_java.s"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -37.3  6.3
## p_loo        16.1  3.0
## looic        74.6 12.6
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   1341      
##  (0.5, 0.7]   (ok)       12    27.3%   159       
##    (0.7, 1]   (bad)       9    20.5%   27        
##    (1, Inf)   (very bad)  2     4.5%   31        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("work_experience_programming.s", "workplace_pair_programming"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -37.3  6.4
## p_loo        16.2  3.1
## looic        74.5 12.8
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   1192      
##  (0.5, 0.7]   (ok)       13    29.5%   427       
##    (0.7, 1]   (bad)       9    20.5%   23        
##    (1, Inf)   (very bad)  1     2.3%   24        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("education_field", "work_experience_java.s"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.8  6.2
## p_loo        14.4  2.7
## looic        73.6 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     25    56.8%   405       
##  (0.5, 0.7]   (ok)        9    20.5%   226       
##    (0.7, 1]   (bad)       8    18.2%   46        
##    (1, Inf)   (very bad)  2     4.5%   17        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("education_field", "workplace_pair_programming"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.9  6.2
## p_loo        14.4  2.8
## looic        73.7 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   834       
##  (0.5, 0.7]   (ok)        6    13.6%   545       
##    (0.7, 1]   (bad)      12    27.3%   67        
##    (1, Inf)   (very bad)  2     4.5%   19        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("work_experience_java.s", "workplace_pair_programming"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -37.3  6.5
## p_loo        16.2  3.2
## looic        74.6 13.0
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   1343      
##  (0.5, 0.7]   (ok)       11    25.0%   566       
##    (0.7, 1]   (bad)      11    25.0%   24        
##    (1, Inf)   (very bad)  1     2.3%   57        
## See help('pareto-k-diagnostic') for details.

Three variables

loo_result <- loo(
  # Benchmark model(s)
  variable_names.with(),
  
  variable_names.with("work_experience_programming.s"),
  variable_names.with("education_field"),
  variable_names.with("work_experience_java.s"),
  variable_names.with("workplace_pair_programming"),
  
  variable_names.with(c("work_experience_programming.s", "education_field")),
  variable_names.with(c("education_field", "work_experience_java.s")),
  variable_names.with(c("education_field", "workplace_pair_programming")),
  
  # New model(s)
  variable_names.with(c("education_field", "work_experience_programming.s", "work_experience_java.s")),
  variable_names.with(c("education_field", "work_experience_programming.s", "workplace_pair_programming")),
  variable_names.with(c("education_field", "work_experience_java.s", "workplace_pair_programming"))
)

Comparison

loo_result[2]
## $diffs
##                                                                                                              elpd_diff
## variable_names.with()                                                                                         0.0     
## variable_names.with("work_experience_java.s")                                                                -0.2     
## variable_names.with(c("education_field", "work_experience_programming.s",     "work_experience_java.s"))     -0.8     
## variable_names.with(c("work_experience_programming.s", "education_field"))                                   -0.8     
## variable_names.with(c("education_field", "work_experience_java.s",     "workplace_pair_programming"))        -1.1     
## variable_names.with("workplace_pair_programming")                                                            -1.2     
## variable_names.with("work_experience_programming.s")                                                         -1.3     
## variable_names.with(c("education_field", "work_experience_java.s"))                                          -1.4     
## variable_names.with(c("education_field", "work_experience_programming.s",     "workplace_pair_programming")) -1.4     
## variable_names.with("education_field")                                                                       -1.4     
## variable_names.with(c("education_field", "workplace_pair_programming"))                                      -1.5     
##                                                                                                              se_diff
## variable_names.with()                                                                                         0.0   
## variable_names.with("work_experience_java.s")                                                                 0.4   
## variable_names.with(c("education_field", "work_experience_programming.s",     "work_experience_java.s"))      1.1   
## variable_names.with(c("work_experience_programming.s", "education_field"))                                    1.1   
## variable_names.with(c("education_field", "work_experience_java.s",     "workplace_pair_programming"))         1.0   
## variable_names.with("workplace_pair_programming")                                                             0.4   
## variable_names.with("work_experience_programming.s")                                                          0.5   
## variable_names.with(c("education_field", "work_experience_java.s"))                                           1.2   
## variable_names.with(c("education_field", "work_experience_programming.s",     "workplace_pair_programming"))  1.0   
## variable_names.with("education_field")                                                                        1.2   
## variable_names.with(c("education_field", "workplace_pair_programming"))                                       1.2

Diagnostics

loo_result[1]
## $loos
## $loos$`variable_names.with()`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -35.4  5.9
## p_loo        13.9  2.6
## looic        70.8 11.9
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   1378      
##  (0.5, 0.7]   (ok)        9    20.5%   284       
##    (0.7, 1]   (bad)      11    25.0%   73        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("work_experience_programming.s")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.7  6.2
## p_loo        15.3  2.9
## looic        73.4 12.3
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   1228      
##  (0.5, 0.7]   (ok)       12    27.3%   181       
##    (0.7, 1]   (bad)      11    25.0%   66        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("education_field")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.8  6.1
## p_loo        13.7  2.6
## looic        73.7 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   649       
##  (0.5, 0.7]   (ok)       10    22.7%   252       
##    (0.7, 1]   (bad)      10    22.7%   53        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("work_experience_java.s")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -35.6  5.9
## p_loo        14.4  2.6
## looic        71.2 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     20    45.5%   1289      
##  (0.5, 0.7]   (ok)       17    38.6%   219       
##    (0.7, 1]   (bad)       7    15.9%   55        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with("workplace_pair_programming")`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.5  6.2
## p_loo        15.1  2.9
## looic        73.1 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     22    50.0%   1589      
##  (0.5, 0.7]   (ok)       13    29.5%   209       
##    (0.7, 1]   (bad)       7    15.9%   65        
##    (1, Inf)   (very bad)  2     4.5%   43        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("work_experience_programming.s", "education_field"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.2  6.0
## p_loo        14.0  2.5
## looic        72.5 12.0
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   711       
##  (0.5, 0.7]   (ok)       12    27.3%   159       
##    (0.7, 1]   (bad)       8    18.2%   40        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("education_field", "work_experience_java.s"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.8  6.2
## p_loo        14.4  2.7
## looic        73.6 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     25    56.8%   405       
##  (0.5, 0.7]   (ok)        9    20.5%   226       
##    (0.7, 1]   (bad)       8    18.2%   46        
##    (1, Inf)   (very bad)  2     4.5%   17        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("education_field", "workplace_pair_programming"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.9  6.2
## p_loo        14.4  2.8
## looic        73.7 12.4
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    54.5%   834       
##  (0.5, 0.7]   (ok)        6    13.6%   545       
##    (0.7, 1]   (bad)      12    27.3%   67        
##    (1, Inf)   (very bad)  2     4.5%   19        
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("education_field", "work_experience_programming.s",     "work_experience_java.s"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.2  5.9
## p_loo        14.1  2.5
## looic        72.3 11.8
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     21    47.7%   746       
##  (0.5, 0.7]   (ok)       11    25.0%   221       
##    (0.7, 1]   (bad)      12    27.3%   80        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("education_field", "work_experience_programming.s",     "workplace_pair_programming"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.8  6.1
## p_loo        14.8  2.7
## looic        73.6 12.2
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     22    50.0%   850       
##  (0.5, 0.7]   (ok)        9    20.5%   300       
##    (0.7, 1]   (bad)      13    29.5%   63        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $loos$`variable_names.with(c("education_field", "work_experience_java.s",     "workplace_pair_programming"))`
## 
## Computed from 4000 by 44 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -36.5  6.1
## p_loo        14.6  2.6
## looic        73.0 12.1
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     20    45.5%   829       
##  (0.5, 0.7]   (ok)       17    38.6%   90        
##    (0.7, 1]   (bad)       6    13.6%   120       
##    (1, Inf)   (very bad)  1     2.3%   23        
## See help('pareto-k-diagnostic') for details.

Candidate models

We pick some of our top performing models as candidates and inspect them closer.

The candidate models are named and listed in order of complexity.

variable_names0

We select the simplest model as a baseline.

variable_names0 <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names0",
  file_refit = "on_change",
  seed = 20210421
)

Summary

summary(variable_names0)
##  Family: binomial 
##   Links: mu = logit 
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session) 
##    Data: d.both_completed (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.77      0.61     0.77     3.07 1.00     1525     2222
## 
## Population-Level Effects: 
##                        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                  1.50      0.51     0.58     2.58 1.00     1915
## high_debt_versionfalse     2.39      0.58     1.31     3.55 1.00     3669
##                        Tail_ESS
## Intercept                  2471
## high_debt_versionfalse     2925
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects

ranef(variable_names0)
## $session
## , , Intercept
## 
##                            Estimate Est.Error       Q2.5       Q97.5
## 6033d69a5af2c702367b3a95  1.0000799 1.4717340 -1.4249299  4.38602233
## 6033d90a5af2c702367b3a96  1.2830673 1.4167111 -1.0090496  4.60563549
## 6034fc165af2c702367b3a98 -1.3713791 0.6442554 -2.6740529 -0.16428921
## 603500725af2c702367b3a99 -1.5035716 1.0680323 -3.6586994  0.49912144
## 603f97625af2c702367b3a9d  1.2869359 1.3758884 -0.9610562  4.49818294
## 603fd5d95af2c702367b3a9e -1.4906993 1.0791093 -3.7245818  0.49205421
## 60409b7b5af2c702367b3a9f  1.2979664 1.4753110 -0.9937849  4.89826741
## 604b82b5a7718fbed181b336 -0.8674641 0.8325170 -2.4968212  0.75509359
## 6050c1bf856f36729d2e5218 -1.8506433 1.0168974 -3.9173681  0.04036338
## 6050e1e7856f36729d2e5219  1.2760302 1.4735299 -1.0872517  4.73310028
## 6055fdc6856f36729d2e521b  1.0435083 1.4703749 -1.4031487  4.45004442
## 60589862856f36729d2e521f  1.0341793 1.4465488 -1.3165140  4.39880449
## 605afa3a856f36729d2e5222 -1.6032954 1.1060685 -3.8799500  0.45667122
## 605c8bc6856f36729d2e5223 -0.9323015 0.9652305 -2.9612146  0.91170646
## 605f3f2d856f36729d2e5224  1.0043917 1.4655341 -1.3900629  4.49891807
## 605f46c3856f36729d2e5225 -1.6307992 0.8960112 -3.4896191 -0.01956279
## 60605337856f36729d2e5226  0.9789789 1.4965974 -1.4564412  4.43908547
## 60609ae6856f36729d2e5228  1.2544406 1.3937102 -0.9511694  4.40085494
## 6061ce91856f36729d2e522e  1.2923945 1.4437943 -0.9756230  4.73794292
## 6061f106856f36729d2e5231 -1.5006643 1.0700480 -3.6531177  0.50650024
## 6068ea9f856f36729d2e523e  1.5956785 1.3876871 -0.5266383  4.82002737
## 6075ab05856f36729d2e5247  1.2826964 1.4061109 -0.9736627  4.50085237

Sampling plots

plot(variable_names0, ask = FALSE)

Posterior predictive check

pp_check(variable_names0, nsamples = 200, type = "bars") 
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

variable_names1

We select the best performing model with one variable.

variable_names1 <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names1",
  file_refit = "on_change",
  seed = 20210421
)

Summary

summary(variable_names1)
##  Family: binomial 
##   Links: mu = logit 
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + (1 | session) 
##    Data: d.both_completed (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.82      0.64     0.77     3.26 1.00     1384     1876
## 
## Population-Level Effects: 
##                               Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept                         1.54      0.50     0.65     2.62 1.00
## high_debt_versionfalse            2.40      0.57     1.35     3.54 1.00
## work_experience_programming.s     0.08      0.51    -0.90     1.12 1.00
##                               Bulk_ESS Tail_ESS
## Intercept                         1908     2527
## high_debt_versionfalse            3967     3096
## work_experience_programming.s     2447     2733
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects

ranef(variable_names1)
## $session
## , , Intercept
## 
##                            Estimate Est.Error       Q2.5       Q97.5
## 6033d69a5af2c702367b3a95  1.1031234 1.6092023 -1.5010741  4.88722125
## 6033d90a5af2c702367b3a96  1.3211019 1.4883279 -1.1256331  4.83309703
## 6034fc165af2c702367b3a98 -1.3776764 0.6695583 -2.7848317 -0.15632122
## 603500725af2c702367b3a99 -1.4940279 1.1395434 -3.8188202  0.61156378
## 603f97625af2c702367b3a9d  1.3612474 1.4893876 -1.0939617  4.79397809
## 603fd5d95af2c702367b3a9e -1.5043894 1.1393353 -3.8883013  0.62439515
## 60409b7b5af2c702367b3a9f  1.3185463 1.5341952 -1.0779879  4.96545678
## 604b82b5a7718fbed181b336 -0.8993642 0.8653438 -2.6972138  0.71479695
## 6050c1bf856f36729d2e5218 -1.9130371 1.0296778 -3.9649086  0.01207538
## 6050e1e7856f36729d2e5219  1.2968818 1.4772775 -0.9941513  4.90329262
## 6055fdc6856f36729d2e521b  1.0459831 1.5212373 -1.3501228  4.54656588
## 60589862856f36729d2e521f  1.0676095 1.6168013 -1.7547637  4.74331689
## 605afa3a856f36729d2e5222 -1.7170475 1.2906898 -4.4979599  0.62339131
## 605c8bc6856f36729d2e5223 -0.9845553 0.9799971 -2.9604091  0.87447565
## 605f3f2d856f36729d2e5224  1.0644148 1.6725635 -1.9382188  4.84592087
## 605f46c3856f36729d2e5225 -1.6399381 0.9180840 -3.4888899  0.05533314
## 60605337856f36729d2e5226  1.0720541 1.5136966 -1.4002236  4.61744766
## 60609ae6856f36729d2e5228  1.3422010 1.5106083 -1.0652152  4.94293052
## 6061ce91856f36729d2e522e  1.3056672 1.4668265 -1.0676987  4.66264319
## 6061f106856f36729d2e5231 -1.5220859 1.0887773 -3.7503493  0.45953600
## 6068ea9f856f36729d2e523e  1.6318117 1.3575185 -0.4803860  4.93170187
## 6075ab05856f36729d2e5247  1.3257292 1.4667113 -0.9803336  4.81086272

Sampling plots

plot(variable_names1, ask = FALSE)

Posterior predictive check

pp_check(variable_names1, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

variable_names2

We select the second best performing model with one variable.

variable_names2 <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + education_field + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names2",
  file_refit = "on_change",
  seed = 20210421
)

Summary

summary(variable_names2)
##  Family: binomial 
##   Links: mu = logit 
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + education_field + (1 | session) 
##    Data: d.both_completed (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.33      0.64     0.20     2.76 1.00      914     1147
## 
## Population-Level Effects: 
##                                    Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept                              2.17      0.68     0.89     3.59 1.00
## high_debt_versionfalse                 2.31      0.58     1.23     3.47 1.00
## education_fieldInteractionDesign       0.32      0.94    -1.53     2.20 1.00
## education_fieldNone                    0.43      0.91    -1.35     2.22 1.00
## education_fieldSoftwareEngineering    -1.14      0.71    -2.52     0.32 1.00
##                                    Bulk_ESS Tail_ESS
## Intercept                              3806     2825
## high_debt_versionfalse                 4530     3415
## education_fieldInteractionDesign       5226     2900
## education_fieldNone                    4677     3078
## education_fieldSoftwareEngineering     3328     2885
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects

ranef(variable_names2)
## $session
## , , Intercept
## 
##                            Estimate Est.Error       Q2.5     Q97.5
## 6033d69a5af2c702367b3a95  0.5029744 1.2364115 -1.6552635 3.5077903
## 6033d90a5af2c702367b3a96  1.0594155 1.2227723 -0.8054366 3.9530083
## 6034fc165af2c702367b3a98 -0.8546110 0.6643820 -2.2688651 0.2811874
## 603500725af2c702367b3a99 -0.9451153 1.0243075 -3.1437653 0.8353074
## 603f97625af2c702367b3a9d  0.7346507 1.2362493 -1.2796284 3.6500677
## 603fd5d95af2c702367b3a9e -0.9252619 1.0203599 -3.1499793 0.7877334
## 60409b7b5af2c702367b3a9f  1.0368972 1.1594958 -0.7417955 3.7367863
## 604b82b5a7718fbed181b336 -0.4509765 0.7532009 -2.0731165 0.9340769
## 6050c1bf856f36729d2e5218 -1.2181052 0.9913146 -3.4186375 0.3957320
## 6050e1e7856f36729d2e5219  1.0742973 1.2109695 -0.7247435 3.9284067
## 6055fdc6856f36729d2e521b  0.4927239 1.2201910 -1.5960730 3.3373333
## 60589862856f36729d2e521f  0.8886165 1.2358535 -0.9666762 3.9302450
## 605afa3a856f36729d2e5222 -1.5420305 1.2071001 -4.1071386 0.3595458
## 605c8bc6856f36729d2e5223 -0.5038544 0.8761894 -2.4050685 1.0634125
## 605f3f2d856f36729d2e5224  0.5030682 1.2489343 -1.6143905 3.4029863
## 605f46c3856f36729d2e5225 -1.0441924 0.8724193 -2.9372607 0.4149143
## 60605337856f36729d2e5226  0.8086655 1.1949834 -1.1258234 3.7548630
## 60609ae6856f36729d2e5228  0.6911081 1.2673451 -1.3954990 3.7150273
## 6061ce91856f36729d2e522e  1.0492630 1.2569093 -0.7562350 4.0135514
## 6061f106856f36729d2e5231 -0.9398848 1.0120709 -3.2163114 0.7462887
## 6068ea9f856f36729d2e523e  0.8087261 1.3070545 -1.3851819 3.8859009
## 6075ab05856f36729d2e5247  0.6142393 1.2893548 -1.6341738 3.7431512

Sampling plots

plot(variable_names2, ask = FALSE)

Posterior predictive check

pp_check(variable_names2, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

variable_names3

We select the best performing model with three variable.

variable_names3 <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + education_field + workplace_pair_programming + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names3",
  file_refit = "on_change",
  seed = 20210421
)

Summary

summary(variable_names3)
##  Family: binomial 
##   Links: mu = logit 
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + education_field + workplace_pair_programming + (1 | session) 
##    Data: d.both_completed (Number of observations: 44) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 22) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.58      0.65     0.43     3.00 1.00      946      811
## 
## Population-Level Effects: 
##                                    Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept                              2.13      0.92     0.40     3.96 1.00
## high_debt_versionfalse                 2.37      0.57     1.30     3.53 1.00
## work_experience_programming.s         -0.05      0.49    -0.99     0.94 1.00
## education_fieldInteractionDesign       0.26      0.93    -1.53     2.09 1.00
## education_fieldNone                    0.36      0.97    -1.51     2.29 1.00
## education_fieldSoftwareEngineering    -1.09      0.77    -2.61     0.44 1.00
## workplace_pair_programmingfalse        0.14      0.73    -1.30     1.58 1.00
##                                    Bulk_ESS Tail_ESS
## Intercept                              3127     2773
## high_debt_versionfalse                 4341     2742
## work_experience_programming.s          3025     2771
## education_fieldInteractionDesign       4405     2837
## education_fieldNone                    4196     2815
## education_fieldSoftwareEngineering     3576     2898
## workplace_pair_programmingfalse        2907     2775
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Random effects

ranef(variable_names3)
## $session
## , , Intercept
## 
##                            Estimate Est.Error       Q2.5     Q97.5
## 6033d69a5af2c702367b3a95  0.6363314 1.4537037 -1.8901595 4.0569877
## 6033d90a5af2c702367b3a96  1.2054050 1.3376124 -0.8682837 4.4280002
## 6034fc165af2c702367b3a98 -1.0607158 0.7078869 -2.5822087 0.1605248
## 603500725af2c702367b3a99 -1.2055639 1.1183165 -3.5212348 0.8550970
## 603f97625af2c702367b3a9d  0.8398589 1.4000264 -1.4152270 4.1459590
## 603fd5d95af2c702367b3a9e -1.0987898 1.0909667 -3.3832731 0.8255944
## 60409b7b5af2c702367b3a9f  1.2224452 1.3651698 -0.9218828 4.5601056
## 604b82b5a7718fbed181b336 -0.4999166 0.9099358 -2.4011452 1.2454805
## 6050c1bf856f36729d2e5218 -1.4947015 1.0458592 -3.6946791 0.3657109
## 6050e1e7856f36729d2e5219  1.3025260 1.3476800 -0.8504121 4.6093643
## 6055fdc6856f36729d2e521b  0.6933774 1.4605823 -1.8056431 4.0166018
## 60589862856f36729d2e521f  1.0848393 1.4340520 -1.4103697 4.4335646
## 605afa3a856f36729d2e5222 -1.8232926 1.3344686 -4.6512117 0.4597669
## 605c8bc6856f36729d2e5223 -0.6723677 0.9676454 -2.7036804 1.1869452
## 605f3f2d856f36729d2e5224  0.7847307 1.5768454 -1.9692426 4.4745793
## 605f46c3856f36729d2e5225 -1.1897052 0.9813016 -3.3301871 0.5460835
## 60605337856f36729d2e5226  1.0396897 1.3837433 -1.1998376 4.1376518
## 60609ae6856f36729d2e5228  0.8164886 1.4388517 -1.5158452 4.1895693
## 6061ce91856f36729d2e522e  1.2383765 1.3380365 -0.9201613 4.2473757
## 6061f106856f36729d2e5231 -1.2166932 1.1150584 -3.5901000 0.7694037
## 6068ea9f856f36729d2e523e  1.0031504 1.4394572 -1.4564339 4.3550277
## 6075ab05856f36729d2e5247  0.8201384 1.5085935 -1.7603424 4.1963944

Sampling plots

plot(variable_names3, ask = FALSE)

Posterior predictive check

pp_check(variable_names3, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

Final model

All candidate models look nice, none is significantly better than the others, we will proceed the simplest model: variable_names0

Variations

We will try a few different variations of the selected candidate model.

All data points

Some participants only completed one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.

Some participants only completed one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.

variable_names0.all <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = as.data.frame(d.completed),
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names0.all",
  file_refit = "on_change",
seed = 20210421
)
Summary
summary(variable_names0.all)
##  Family: binomial 
##   Links: mu = logit 
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session) 
##    Data: as.data.frame(d.completed) (Number of observations: 51) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 29) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.65      0.56     0.69     2.90 1.00     1102     1647
## 
## Population-Level Effects: 
##                        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                  1.48      0.47     0.63     2.50 1.00     1794
## high_debt_versionfalse     2.49      0.55     1.45     3.59 1.00     3314
##                        Tail_ESS
## Intercept                  2636
## high_debt_versionfalse     2961
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Random effects
ranef(variable_names0.all)
## $session
## , , Intercept
## 
##                            Estimate Est.Error       Q2.5       Q97.5
## 6033c6fc5af2c702367b3a93 -1.3776566 1.1413406 -3.7992165  0.64779639
## 6033d69a5af2c702367b3a95  0.9097195 1.4144715 -1.4227953  4.23322120
## 6033d90a5af2c702367b3a96  1.1598831 1.3425903 -0.9873769  4.34080133
## 6034fc165af2c702367b3a98 -1.3392593 0.6225870 -2.6130898 -0.19603135
## 603500725af2c702367b3a99 -1.4402577 1.0546821 -3.5999002  0.54780772
## 603f84f15af2c702367b3a9b  0.2313512 1.5684729 -2.6662670  3.61414177
## 603f97625af2c702367b3a9d  1.1858735 1.3560270 -1.0084036  4.39978300
## 603fd5d95af2c702367b3a9e -1.4474620 1.0968151 -3.7256503  0.58847012
## 60409b7b5af2c702367b3a9f  1.1565359 1.4071579 -1.0260407  4.53331329
## 604b82b5a7718fbed181b336 -0.8443277 0.8300019 -2.4754107  0.77475369
## 604f1239a7718fbed181b33f  0.7594788 1.4135739 -1.6981436  3.95930996
## 6050c1bf856f36729d2e5218 -1.8092374 1.0156075 -3.9074462  0.04796727
## 6050e1e7856f36729d2e5219  1.1573462 1.3540281 -1.0302610  4.28602751
## 6055fdc6856f36729d2e521b  0.9361034 1.4810585 -1.5495298  4.45320670
## 60579f2a856f36729d2e521e  0.2808622 1.5954035 -2.7585403  3.68255553
## 60589862856f36729d2e521f  0.9414627 1.3643197 -1.3907914  4.05137246
## 605a30a7856f36729d2e5221  0.1588349 1.6278995 -2.9914919  3.60710709
## 605afa3a856f36729d2e5222 -1.5463513 1.0953259 -3.7661089  0.44567543
## 605c8bc6856f36729d2e5223 -0.8965700 0.9686405 -2.8877004  0.95363856
## 605f3f2d856f36729d2e5224  0.9230826 1.4043040 -1.4342231  4.09352635
## 605f46c3856f36729d2e5225 -1.5715074 0.8658758 -3.3364812  0.02275444
## 60605337856f36729d2e5226  0.8750994 1.3693922 -1.3817921  4.11633896
## 60609ae6856f36729d2e5228  1.1725713 1.3686358 -1.0711924  4.36775662
## 6061ce91856f36729d2e522e  1.1907754 1.3484896 -0.9558661  4.30372540
## 6061f106856f36729d2e5231 -1.4199720 1.0647563 -3.5859166  0.51268974
## 60672faa856f36729d2e523c  0.2488756 1.5520603 -2.5815375  3.58046129
## 6068ea9f856f36729d2e523e  1.4908078 1.2919320 -0.5318217  4.37492466
## 606db69d856f36729d2e5243  0.4471265 1.4869757 -2.1182614  3.73838005
## 6075ab05856f36729d2e5247  1.1868732 1.3584384 -0.9802006  4.33205490
Sampling plots
plot(variable_names0.all, ask = FALSE)

Posterior predictive check
pp_check(variable_names0.all, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

With experience predictor

As including all data points didn’t harm the model we will create this variant with all data points as well.

This variation includes work_experience_programming.s predictors as it can give further insight into how experience play a factor in the effect we try to measure. This is especially important as our sampling shewed towards containing less experienced developer than the population at large.

variable_names0.all.exp <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = as.data.frame(d.completed),
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names0.all.exp",
  file_refit = "on_change",
  seed = 20210421
)
Summary
summary(variable_names0.all.exp)
##  Family: binomial 
##   Links: mu = logit 
## Formula: var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + (1 | session) 
##    Data: as.data.frame(d.completed) (Number of observations: 51) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~session (Number of levels: 29) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.75      0.59     0.74     3.07 1.00      992     2020
## 
## Population-Level Effects: 
##                               Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept                         1.53      0.49     0.66     2.60 1.00
## high_debt_versionfalse            2.49      0.56     1.43     3.59 1.00
## work_experience_programming.s     0.16      0.48    -0.79     1.15 1.00
##                               Bulk_ESS Tail_ESS
## Intercept                         2068     2304
## high_debt_versionfalse            3841     2889
## work_experience_programming.s     2379     2514
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Random effects
ranef(variable_names0.all.exp)
## $session
## , , Intercept
## 
##                            Estimate Est.Error       Q2.5        Q97.5
## 6033c6fc5af2c702367b3a93 -1.4252267 1.1674591 -3.8634649  0.749721615
## 6033d69a5af2c702367b3a95  0.9714075 1.4505388 -1.4176611  4.361301324
## 6033d90a5af2c702367b3a96  1.3223984 1.4818230 -1.0972695  4.694637545
## 6034fc165af2c702367b3a98 -1.3351255 0.6482382 -2.6631792 -0.121895378
## 603500725af2c702367b3a99 -1.4922597 1.1091510 -3.7298398  0.656671752
## 603f84f15af2c702367b3a9b  0.3391517 1.7286198 -2.7445878  4.135314679
## 603f97625af2c702367b3a9d  1.3282933 1.4236739 -0.9470063  4.684925803
## 603fd5d95af2c702367b3a9e -1.4731874 1.0772010 -3.7458528  0.532212257
## 60409b7b5af2c702367b3a9f  1.2498666 1.4159020 -1.0694834  4.573069030
## 604b82b5a7718fbed181b336 -0.8499655 0.8517210 -2.6012967  0.786460104
## 604f1239a7718fbed181b33f  0.8948890 1.4814654 -1.6011239  4.245941060
## 6050c1bf856f36729d2e5218 -1.9088551 1.0150133 -3.9761813 -0.008455228
## 6050e1e7856f36729d2e5219  1.2708150 1.4216961 -0.9923952  4.573501478
## 6055fdc6856f36729d2e521b  1.0010708 1.4960728 -1.3791554  4.482958606
## 60579f2a856f36729d2e521e  0.2717364 1.6833344 -2.8511022  3.979450731
## 60589862856f36729d2e521f  0.9413392 1.5772442 -1.8085568  4.515089346
## 605a30a7856f36729d2e5221  0.1591390 1.7764715 -3.2938164  3.955132531
## 605afa3a856f36729d2e5222 -1.8147806 1.2867818 -4.4688989  0.535180628
## 605c8bc6856f36729d2e5223 -0.9995425 0.9798865 -2.9924955  0.919953290
## 605f3f2d856f36729d2e5224  0.8885764 1.6414037 -2.1341401  4.365412141
## 605f46c3856f36729d2e5225 -1.5805128 0.9216654 -3.4943404  0.124470294
## 60605337856f36729d2e5226  1.0140176 1.4412992 -1.4171227  4.441034840
## 60609ae6856f36729d2e5228  1.2818974 1.4245724 -0.9877157  4.630680580
## 6061ce91856f36729d2e522e  1.2764264 1.4146934 -1.0036760  4.577079040
## 6061f106856f36729d2e5231 -1.4789954 1.1046920 -3.6834869  0.537556638
## 60672faa856f36729d2e523c  0.2635877 1.6395756 -2.8819881  3.824061659
## 6068ea9f856f36729d2e523e  1.5304261 1.2923150 -0.5160073  4.618306764
## 606db69d856f36729d2e5243  0.4329476 1.5888341 -2.4626955  3.950783733
## 6075ab05856f36729d2e5247  1.2669265 1.4473865 -1.0989070  4.595663531
Sampling plots
plot(variable_names0.all.exp, ask = FALSE)

Posterior predictive check
pp_check(variable_names0.all.exp, nsamples = 200, type = "bars")
## Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
## instead.

Loo comparison
loo(
  variable_names0.all,
  variable_names0.all.exp
)
## Output of model 'variable_names0.all':
## 
## Computed from 4000 by 51 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -39.2  6.4
## p_loo        15.5  3.0
## looic        78.5 12.9
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     24    47.1%   1204      
##  (0.5, 0.7]   (ok)       16    31.4%   165       
##    (0.7, 1]   (bad)      11    21.6%   44        
##    (1, Inf)   (very bad)  0     0.0%   <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'variable_names0.all.exp':
## 
## Computed from 4000 by 51 log-likelihood matrix
## 
##          Estimate   SE
## elpd_loo    -40.1  6.6
## p_loo        16.7  3.1
## looic        80.1 13.1
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     22    43.1%   1352      
##  (0.5, 0.7]   (ok)       18    35.3%   152       
##    (0.7, 1]   (bad)       9    17.6%   60        
##    (1, Inf)   (very bad)  2     3.9%   25        
## See help('pareto-k-diagnostic') for details.
## 
## Model comparisons:
##                         elpd_diff se_diff
## variable_names0.all      0.0       0.0   
## variable_names0.all.exp -0.8       0.5

Final model

  • Fitting the model to all data point did not significantly damage the model and will be used as is a more fair representation of reality.
  • Adding the experience predictors did not significantly damage the model and will be used as it provides useful insight.

This means that our final model, with all data points and experience predictors, is variable_names0.all.exp

Interpreting the model

To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.

\(\beta\) parameters

mcmc_areas(
  variable_names0.all.exp, 
  pars = c("b_high_debt_versionfalse", "b_work_experience_programming.s"), 
  prob = 0.95
) + 
  scale_y_discrete() +
  scale_y_discrete(labels=c("High debt version: false", "Professional programming experience")) +
  ggtitle(
    "Beta parameters densities in variable naming model", 
    subtitle = "Shaded region marks 95% of the density. Line marks the median"
  )

Effects sizes

We start by extracting posterior samples

scale_programming_experience <- function(x) {
  (x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
  x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}

post_settings <- expand.grid(
  high_debt_version = c("false", "true"),
  session = NA,
  var_names_new_all = 1000,
  work_experience_programming.s = sapply(c(0, 3, 10, 25, 40), scale_programming_experience)
)

post <- posterior_predict(variable_names0.all.exp, newdata = post_settings) %>%
  melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
  left_join(
    rowid_to_column(post_settings, var= "settings_id"),
    by = "settings_id"
  ) %>%
  mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
  select(
    estimate,
    high_debt_version,
    work_experience_programming
  )%>%
  mutate(estimate = estimate/1000)

ggplot(post %>% filter(work_experience_programming == 10), aes(x=estimate, fill = high_debt_version)) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(
    name = "Debt version",
    labels = c("Low debt", "High debt"),
    values = c("lightblue", "darkblue")
  ) +
  facet_grid(rows = vars(work_experience_programming)) +
  labs(
    title = "Rate of good variable naming",
    x = "Rate",
    y = "Density"
  )

ggplot(post, aes(x=estimate, fill = high_debt_version)) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(
    name = "Debt version",
    labels = c("Low debt", "High debt"),
    values = c("lightblue", "darkblue")
  ) +
  facet_grid(rows = vars(work_experience_programming)) +
  labs(
    title = "Rate of good variable naming / professional programmign experience",
    x = "Rate",
    y = "Density"
  )

scale_programming_experience <- function(x) {
  (x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
  x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}

post_settings <- expand.grid(
  high_debt_version = c("false", "true"),
  session = NA,
  var_names_new_all = 10,
  work_experience_programming.s = sapply(c(10), scale_programming_experience)
)

post <- posterior_predict(variable_names0.all.exp, newdata = post_settings) %>%
  melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
  left_join(
    rowid_to_column(post_settings, var= "settings_id"),
    by = "settings_id"
  ) %>%
  mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
  select(
    estimate,
    high_debt_version,
    work_experience_programming
  )

levels(post$high_debt_version) <- c("Low debt version", "High debt version")

ggplot(post, aes(x=estimate, fill = high_debt_version)) +
  geom_bar() +
  facet_grid(rows = vars(high_debt_version)) +
  scale_fill_manual(
    name = "Debt version",
    labels = c("Low debt version", "High debt version"),
    values = c("lightblue", "darkblue")
  ) +
  labs(
    title = "Variable naming (10 named variables)",
    x = "Number of good variable names",
    y = "Rate of occurrence"
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0,1,2,3,4,5,6,7,8,9,10), labels = c(0,1,2,3,4,5,6,7,8,9,10)) +
  scale_y_continuous(limits = NULL, breaks = sapply(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7), function(x) x*nrow(post) / 2), labels = c("10%","20%","30%","40%","50%", "60%", "70%")) + theme(legend.position = "hidden")

bad_names.high <- 10 - (post %>% filter(high_debt_version == "High debt version") %>% pull(estimate))
bad_names.low <- 10 - (post %>% filter(high_debt_version == "Low debt version") %>% pull(estimate))
x <- sum(bad_names.high) / sum(bad_names.low)
x
## [1] 5.224778

Considering developers with 10 years of professional programming experience we find that they introduce 422% more non-descriptive variable names in the high debt version.

---
title: "Variable Naming Model"
author: Hampus Broman & William Levén
date: 2021-05
output: 
  html_document: 
    pandoc_args: [ "-o", "docs/variable_naming.html" ]
---

```{r include-setup, include=FALSE}
# Load setup file
source(knitr::purl('setup.Rmd', output = tempfile()))
```

## Looking at the data {.tabset}

There appears to be significant difference in the rate of bad variable naming between the low and high debt groups.

### New variable names 

```{r plot1}

d.both_completed %>%
  ggplot(aes(x=var_names_new_good.ratio, fill=high_debt_version)) + 
  geom_boxplot() +
  labs(
    title = "Distribution of good variable naming rate (new variables)",
    x ="Ratio of good variable name selection"
  ) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(
    name = "Debt level", 
    labels = c("High debt", "Low debt"), 
    values = c("#7070FF", "lightblue"), 
    guide = guide_legend(reverse = TRUE)
  ) 

```

### Copied variable names

```{r plot2}
d.both_completed %>%
  ggplot(aes(x=var_names_copied_good.ratio, fill=high_debt_version)) + 
  geom_boxplot() +
  labs(
    title = "Distribution of good variable naming rate (copied variables)",
    x ="Ratio of good variable name selection"
  ) +
  scale_y_continuous(breaks = NULL) +
  scale_fill_manual(
    name = "Debt level", 
    labels = c("High debt", "Low debt"), 
    values = c("#7070FF", "lightblue"), 
    guide = guide_legend(reverse = TRUE)
  ) 

```

## Descriptive statistics: {.tabset}

### New variable names

```{r descriptive-statistics-new-vars}
d.both_completed %>%
  pull(var_names_new_good.ratio) %>% 
  summary()

sprintf("Variance: %.2f", var(pull(d.both_completed, var_names_new_good.ratio)))
```

### Copied variable names

```{r descriptive-statistics-copied-vars}
d.both_completed %>%
  pull(var_names_copied_good.ratio) %>% 
  summary()

sprintf("Variance: %.2f", var(pull(d.both_completed, var_names_copied_good.ratio)))
```

## Initial model

Variable names are modeled using the binomial family, where the amount of trials is the total amount of new/copied variable names.

We include `high_debt_verison` as well as a varying intercept for each individual in our initial model.

### Selecting priors {.tabset}
As the the data represents a series on bernoulli trials we chose a binomial model.

We iterate over the model until we have sane priors, that are able to fit the data resonably well.. The prior "lkj(2)" will mean the model is skeptical of strong correlations.

#### Base model with priors

```{r initial-model-definition, class.source = 'fold-show'}
variable_names.with <- extendable_model(
  base_name = "variable_names",
  base_formula = "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1  | session)",
  base_priors = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  base_control = list(adapt_delta = 0.95)
)

```

#### Default priors

```{r default-priors}
prior_summary(variable_names.with(only_priors= TRUE))
```

#### Selected priors

```{r selected-priors}
prior_summary(variable_names.with(sample_prior = "only"))
```

#### Prior predictive check

```{r priors-check, warning=FALSE}
pp_check(variable_names.with(sample_prior = "only"), nsamples = 200)
```

#### Beta parameter influence

We choose a beta parameter priors allowing for the beta parameter to account for 50% of the effect but that is skeptical to such strong effects from the beta parameter.

```{r priors-beta, warning=FALSE}
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 2, 1)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100

data.frame(x = sim.beta.diff) %>%
  ggplot(aes(x)) +
  geom_density() +
  xlim(-80, 80) +
  labs(
    title = "Beta parameter prior influence",
    x = "Estimate with beta as % of estimate without beta",
    y = "Density"
  )

```


### Model fit  {.tabset}

We check the posterior distribution and can see that the model seems to have been able to fit the data well
Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.

#### Posterior predictive check

```{r base-pp-check}
pp_check(variable_names.with(), nsamples = 200, type = "bars")
```

#### Summary

```{r base-summary}
summary(variable_names.with())
```

#### Sampling plots

```{r base-plot}
plot(variable_names.with(), ask = FALSE)
```

## Model predictor extenstions {.tabset}

```{r mo-priors}
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
```

### One variable {.tabset}

```{r model-extension-1, warning=FALSE, class.source = 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  variable_names.with(),
  
  # New model(s)
  variable_names.with("work_domain"),
  variable_names.with("work_experience_programming.s"),
  variable_names.with("work_experience_java.s"),
  variable_names.with("education_field"),
  variable_names.with("mo(education_level)", edlvl_prior),
  variable_names.with("workplace_peer_review"),
  variable_names.with("workplace_td_tracking"),
  variable_names.with("workplace_pair_programming"),
  variable_names.with("workplace_coding_standards"),
  variable_names.with("scenario"),
  variable_names.with("group")
)
```

#### Comparison

```{r model-extension-1-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-1-dig, warning=FALSE}
loo_result[1]
```

### Two variables {.tabset}

```{r model-extension-2, warning=FALSE, class.source = 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  variable_names.with(),
  variable_names.with("work_experience_programming.s"),
  variable_names.with("education_field"),
  variable_names.with("work_experience_java.s"),
  variable_names.with("workplace_pair_programming"),
  
  # New model(s)
  variable_names.with(c("work_experience_programming.s", "education_field")),
  variable_names.with(c("work_experience_programming.s", "work_experience_java.s")),
  variable_names.with(c("work_experience_programming.s", "workplace_pair_programming")),
  
  variable_names.with(c("education_field", "work_experience_java.s")),
  variable_names.with(c("education_field", "workplace_pair_programming")),
  
  variable_names.with(c("work_experience_java.s", "workplace_pair_programming"))
)
```

#### Comparison

```{r model-extension-2-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-2-dig, warning=FALSE}
loo_result[1]
```

### Three variables {.tabset}

```{r model-extension-3, warning=FALSE, class.source= 'fold-show'}
loo_result <- loo(
  # Benchmark model(s)
  variable_names.with(),
  
  variable_names.with("work_experience_programming.s"),
  variable_names.with("education_field"),
  variable_names.with("work_experience_java.s"),
  variable_names.with("workplace_pair_programming"),
  
  variable_names.with(c("work_experience_programming.s", "education_field")),
  variable_names.with(c("education_field", "work_experience_java.s")),
  variable_names.with(c("education_field", "workplace_pair_programming")),
  
  # New model(s)
  variable_names.with(c("education_field", "work_experience_programming.s", "work_experience_java.s")),
  variable_names.with(c("education_field", "work_experience_programming.s", "workplace_pair_programming")),
  variable_names.with(c("education_field", "work_experience_java.s", "workplace_pair_programming"))
)


```

#### Comparison

```{r model-extension-3-sum, warning=FALSE}
loo_result[2]
```

#### Diagnostics

```{r model-extension-3-dig, warning=FALSE}
loo_result[1]
```

## Candidate models  {.tabset}
We pick some of our top performing models as candidates and inspect them closer.

The candidate models are named and listed in order of complexity.

### variable_names0  {.tabset}
We select the simplest model as a baseline.
  
```{r variable_names0, class.source = 'fold-show'}
variable_names0 <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names0",
  file_refit = "on_change",
  seed = 20210421
)

```

#### Summary

```{r variable_names0-sum}
summary(variable_names0)
```

#### Random effects

```{r variable_names0-raneff}
ranef(variable_names0)
```

#### Sampling plots

```{r variable_names0-plot}
plot(variable_names0, ask = FALSE)
```

#### Posterior predictive check

```{r variable_names0-pp}
pp_check(variable_names0, nsamples = 200, type = "bars") 
```

### variable_names1  {.tabset}
We select the best performing model with one variable.
  
```{r variable_names1, class.source = 'fold-show'}
variable_names1 <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names1",
  file_refit = "on_change",
  seed = 20210421
)

```

#### Summary

```{r variable_names1-sum}
summary(variable_names1)
```

#### Random effects

```{r variable_names1-raneff}
ranef(variable_names1)
```

#### Sampling plots

```{r variable_names1-plot}
plot(variable_names1, ask = FALSE)
```

#### Posterior predictive check

```{r variable_names1-pp}
pp_check(variable_names1, nsamples = 200, type = "bars")
```

### variable_names2  {.tabset}
We select the second best performing model with one variable.
  
```{r variable_names2, class.source = 'fold-show'}
variable_names2 <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + education_field + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names2",
  file_refit = "on_change",
  seed = 20210421
)

```
#### Summary

```{r variable_names2-sum}
summary(variable_names2)
```

#### Random effects

```{r variable_names2-raneff}
ranef(variable_names2)
```

#### Sampling plots

```{r variable_names2-plot}
plot(variable_names2, ask = FALSE)
```

#### Posterior predictive check

```{r variable_names2-pp}
pp_check(variable_names2, nsamples = 200, type = "bars")
```

### variable_names3  {.tabset}
We select the best performing model with three variable.
  
```{r variable_names3, class.source = 'fold-show'}
variable_names3 <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + education_field + workplace_pair_programming + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = d.both_completed,
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names3",
  file_refit = "on_change",
  seed = 20210421
)
```

#### Summary

```{r variable_names3-sum}
summary(variable_names3)
```

#### Random effects

```{r variable_names3-raneff}
ranef(variable_names3)
```

#### Sampling plots

```{r variable_names3-plot}
plot(variable_names3, ask = FALSE)
```

#### Posterior predictive check

```{r variable_names3-pp}
pp_check(variable_names3, nsamples = 200, type = "bars")
```


## Final model 
All candidate models look nice, none is significantly better than the others, we will proceed the simplest model: `variable_names0`

### Variations {.tabset}
We will try a few different variations of the selected candidate model.

#### All data points {.tabset}

Some participants only completed one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.

Some participants only completed one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.

```{r variable_names0.all, class.source = 'fold-show'}
variable_names0.all <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = as.data.frame(d.completed),
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names0.all",
  file_refit = "on_change",
seed = 20210421
)
```

##### Summary

```{r variation.all-sum}
summary(variable_names0.all)
```

##### Random effects

```{r variation.all-raneff}
ranef(variable_names0.all)
```

##### Sampling plots

```{r variation.all-plot}
plot(variable_names0.all, ask = FALSE)
```

##### Posterior predictive check

```{r variation.all-pp}
pp_check(variable_names0.all, nsamples = 200, type = "bars")
```

#### With experience predictor {.tabset}

As including all data points didn't harm the model we will create this variant with all data points as well.

This variation includes `work_experience_programming.s` predictors as it can give further insight into how experience play a factor in the effect we try to measure. This is especially important as our sampling shewed towards containing less experienced developer than the population at large.

```{r variable_names0.all.exp, class.source = 'fold-show'}
variable_names0.all.exp <- brm(
  "var_names_new_good | trials(var_names_new_all) ~ 1 + high_debt_version + work_experience_programming.s + (1 | session)",
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(2, 1), class = "Intercept"),
    prior(exponential(1), class = "sd")
  ),
  family = binomial(),
  data = as.data.frame(d.completed),
  control = list(adapt_delta = 0.95),
  file = "fits/variable_names0.all.exp",
  file_refit = "on_change",
  seed = 20210421
)
```

##### Summary

```{r variation.all.exp-sum}
summary(variable_names0.all.exp)
```

##### Random effects

```{r variation.all.exp-raneff}
ranef(variable_names0.all.exp)
```

##### Sampling plots

```{r variation.all.exp-plot}
plot(variable_names0.all.exp, ask = FALSE)
```

##### Posterior predictive check

```{r variation.all.exp-pp}
pp_check(variable_names0.all.exp, nsamples = 200, type = "bars")
```

##### Loo comparison

```{r variation.all.exp-loo, warning=FALSE}
loo(
  variable_names0.all,
  variable_names0.all.exp
)
```

### Final model
* Fitting the model to all data point did not significantly damage the model and will be used as is a more fair representation of reality.
* Adding the experience predictors did not significantly damage the model and will be used as it provides useful insight.

This means that our final model, with all data points and experience predictors, is `variable_names0.all.exp`

## Interpreting the model
To begin interpreting the model we look at how it's parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the $\beta$ parameters.

### $\beta$ parameters
```{r interpret-beta-plot, warning=FALSE, message=FALSE}
mcmc_areas(
  variable_names0.all.exp, 
  pars = c("b_high_debt_versionfalse", "b_work_experience_programming.s"), 
  prob = 0.95
) + 
  scale_y_discrete() +
  scale_y_discrete(labels=c("High debt version: false", "Professional programming experience")) +
  ggtitle(
    "Beta parameters densities in variable naming model", 
    subtitle = "Shaded region marks 95% of the density. Line marks the median"
  )
```

### Effects sizes
We start by extracting posterior samples

```{r effect-size-1}
scale_programming_experience <- function(x) {
  (x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
  x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}

post_settings <- expand.grid(
  high_debt_version = c("false", "true"),
  session = NA,
  var_names_new_all = 1000,
  work_experience_programming.s = sapply(c(0, 3, 10, 25, 40), scale_programming_experience)
)

post <- posterior_predict(variable_names0.all.exp, newdata = post_settings) %>%
  melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
  left_join(
    rowid_to_column(post_settings, var= "settings_id"),
    by = "settings_id"
  ) %>%
  mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
  select(
    estimate,
    high_debt_version,
    work_experience_programming
  )%>%
  mutate(estimate = estimate/1000)

ggplot(post %>% filter(work_experience_programming == 10), aes(x=estimate, fill = high_debt_version)) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(
    name = "Debt version",
    labels = c("Low debt", "High debt"),
    values = c("lightblue", "darkblue")
  ) +
  facet_grid(rows = vars(work_experience_programming)) +
  labs(
    title = "Rate of good variable naming",
    x = "Rate",
    y = "Density"
  )

```

```{r effect-size-2}
ggplot(post, aes(x=estimate, fill = high_debt_version)) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(
    name = "Debt version",
    labels = c("Low debt", "High debt"),
    values = c("lightblue", "darkblue")
  ) +
  facet_grid(rows = vars(work_experience_programming)) +
  labs(
    title = "Rate of good variable naming / professional programmign experience",
    x = "Rate",
    y = "Density"
  )

```

```{r effect-size-3}
scale_programming_experience <- function(x) {
  (x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
  x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}

post_settings <- expand.grid(
  high_debt_version = c("false", "true"),
  session = NA,
  var_names_new_all = 10,
  work_experience_programming.s = sapply(c(10), scale_programming_experience)
)

post <- posterior_predict(variable_names0.all.exp, newdata = post_settings) %>%
  melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
  left_join(
    rowid_to_column(post_settings, var= "settings_id"),
    by = "settings_id"
  ) %>%
  mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
  select(
    estimate,
    high_debt_version,
    work_experience_programming
  )

levels(post$high_debt_version) <- c("Low debt version", "High debt version")

ggplot(post, aes(x=estimate, fill = high_debt_version)) +
  geom_bar() +
  facet_grid(rows = vars(high_debt_version)) +
  scale_fill_manual(
    name = "Debt version",
    labels = c("Low debt version", "High debt version"),
    values = c("lightblue", "darkblue")
  ) +
  labs(
    title = "Variable naming (10 named variables)",
    x = "Number of good variable names",
    y = "Rate of occurrence"
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0,1,2,3,4,5,6,7,8,9,10), labels = c(0,1,2,3,4,5,6,7,8,9,10)) +
  scale_y_continuous(limits = NULL, breaks = sapply(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7), function(x) x*nrow(post) / 2), labels = c("10%","20%","30%","40%","50%", "60%", "70%")) + theme(legend.position = "hidden")

```

```{r es-10, class.source = 'fold-show'}
bad_names.high <- 10 - (post %>% filter(high_debt_version == "High debt version") %>% pull(estimate))
bad_names.low <- 10 - (post %>% filter(high_debt_version == "Low debt version") %>% pull(estimate))
x <- sum(bad_names.high) / sum(bad_names.low)
x
```

Considering developers with 10 years of professional programming experience we find that they introduce `r scales::label_percent()(x - 1)` more non-descriptive variable names in the high debt version.
