We plot the data and can see that there is no obvious large difference between the debt versions. We do however see a difference when it comes to which scenario the dropout was on.
d.sessions %>%
ggplot(aes(task_completion, fill = task_completion)) +
geom_bar() +
labs(title = "Task completion per session") +
xlab("Least completed task per session") +
ylab("Number of sessions") +
scale_fill_manual(values = rep("#7070FF", 4), guide = NULL)
d %>%
mutate_at("high_debt_version",
function(x) case_when(
x == "false" ~ "Low debt",
x == "true" ~ "High debt"
)) %>%
filter(task_completion == "Not submitted") %>%
ggplot(aes(x=high_debt_version, fill = high_debt_version)) +
geom_bar() +
labs(title = "Droputs per debt level") +
xlab("Debt level") +
ylab("Number of dropouts") +
scale_fill_manual(values = rep("#7070FF", 2), guide = NULL) +
scale_y_continuous(breaks = c(0, 2, 4, 6, 8))
d %>%
filter(task_completion == "Not submitted") %>%
ggplot(aes(scenario, fill = scenario)) +
geom_bar() +
labs(title = "Dropputs per scenraio") +
ylab("Number of dropouts") +
xlab("Scenario") +
scale_fill_manual(values = rep("#7070FF", 2), guide = NULL) +
scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10))
d.sessions %>%
mutate_at("group",
function(x) x <- case_when(
x == "code-interested" ~ "code-int.",
x == "product-company" ~ "prod.-comp.",
x == "professional-contact" ~ "prof.-cont.",
TRUE ~ as.character(x)
)) %>%
ggplot(aes(group, fill=task_completion, position = "fill")) +
geom_bar(position = "fill") +
labs(title = "Dropputs per signup group") +
ylab("Ratio") +
xlab("Signup Group") +
scale_fill_manual("Task completion", values = c("transparent", "lightblue", "#7070FF", "darkblue"))
The outcome model represent incremental steps and will therefore be modeled with stopping ratio. Other incremental models were considered but stopping ratio seems to best fit both the data and the underlying data generation process.
We include high_debt_verison
as a predictor in our model as this variable represent the very effect we want to measure.
We iterate over the model until we have sane priors.
dropouts.with <- extendable_model(
base_name = "dropouts",
base_formula = "task_completion ~ 1 + high_debt_version",
base_priors = c(
prior(normal(0, 0.4), class = "b"),
prior(normal(-2, 1), class = "Intercept")
),
family = sratio(),
data = d,
)
prior_summary(dropouts.with(only_priors= TRUE))
prior_summary(dropouts.with(sample_prior = "only"))
pp_check(dropouts.with(sample_prior = "only"), nsamples = 200, type = "bars")
We choose a beta parameter priors allowing for the beta parameter to account for 75% of the effect but that is skeptical to such strong effects from the beta parameter.
sim.size <- 1000
sim.intercept <- rnorm(sim.size, -2, 1)
sim.beta <- rnorm(sim.size, 0, 0.4)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100
data.frame(x = sim.beta.diff) %>%
ggplot(aes(x)) +
geom_density() +
xlim(-80, 80) +
labs(
title = "Beta parameter prior influence",
x = "Estimate with beta as % of estimate without beta",
y = "Density"
)
We check the posterior distribution and can see that the model seems to have been able to fit the data well. Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.
pp_check(dropouts.with(), nsamples = 200, type = "bars")
summary(dropouts.with())
## Family: sratio
## Links: mu = logit; disc = identity
## Formula: task_completion ~ 1 + high_debt_version
## Data: as.data.frame(data) (Number of observations: 73)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -1.19 0.31 -1.80 -0.61 1.00 4336
## Intercept[2] -2.92 0.56 -4.07 -1.93 1.00 4361
## Intercept[3] -2.63 0.52 -3.74 -1.68 1.00 4463
## high_debt_versionfalse 0.17 0.30 -0.42 0.76 1.00 4403
## Tail_ESS
## Intercept[1] 2945
## Intercept[2] 2947
## Intercept[3] 2637
## high_debt_versionfalse 2674
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(dropouts.with(), ask = FALSE)
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
We use loo
to check some possible extensions on the model.
loo_result <- loo(
# Benchmark model(s)
dropouts.with(),
# New model(s)
dropouts.with("work_domain"),
dropouts.with("work_experience_programming.s"),
dropouts.with("work_experience_java.s"),
dropouts.with("education_field"),
dropouts.with("mo(education_level)", edlvl_prior),
dropouts.with("workplace_peer_review"),
dropouts.with("workplace_td_tracking"),
dropouts.with("workplace_pair_programming"),
dropouts.with("workplace_coding_standards"),
dropouts.with("scenario"),
dropouts.with("group")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## dropouts.with("education_field") 0.0 0.0
## dropouts.with("workplace_pair_programming") -0.3 1.7
## dropouts.with("scenario") -0.7 1.6
## dropouts.with("group") -1.1 1.4
## dropouts.with() -1.3 1.2
## dropouts.with("work_experience_programming.s") -1.3 1.4
## dropouts.with("work_experience_java.s") -1.3 1.4
## dropouts.with("workplace_peer_review") -1.5 1.3
## dropouts.with("workplace_coding_standards") -1.6 1.3
## dropouts.with("workplace_td_tracking") -1.6 1.2
## dropouts.with("mo(education_level)", edlvl_prior) -1.7 1.2
## dropouts.with("work_domain") -1.7 1.4
loo_result[1]
## $loos
## $loos$`dropouts.with()`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.5 7.1
## p_loo 2.7 0.5
## looic 125.1 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("work_domain")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.0 7.1
## p_loo 4.5 0.6
## looic 126.0 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("work_experience_programming.s")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.6 7.0
## p_loo 3.1 0.5
## looic 125.1 14.1
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("work_experience_java.s")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.6 7.1
## p_loo 3.1 0.6
## looic 125.2 14.1
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("education_field")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.3 7.0
## p_loo 3.4 0.6
## looic 122.5 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.0 7.1
## p_loo 3.3 0.5
## looic 125.9 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("workplace_peer_review")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.8 7.1
## p_loo 3.1 0.6
## looic 125.5 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("workplace_td_tracking")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.9 7.1
## p_loo 3.0 0.5
## looic 125.8 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("workplace_pair_programming")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.6 7.0
## p_loo 3.1 0.5
## looic 123.1 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("workplace_coding_standards")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.8 7.1
## p_loo 3.0 0.5
## looic 125.7 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("scenario")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.0 7.0
## p_loo 3.1 0.5
## looic 124.0 13.9
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("group")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.3 7.2
## p_loo 4.0 0.6
## looic 124.6 14.3
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
dropouts.with(),
dropouts.with("education_field"),
dropouts.with("workplace_pair_programming"),
dropouts.with("scenario"),
dropouts.with("group"),
# New model(s)
dropouts.with(c("education_field", "workplace_pair_programming")),
dropouts.with(c("education_field", "scenario")),
dropouts.with(c("education_field", "group")),
dropouts.with(c("workplace_pair_programming", "scenario")),
dropouts.with(c("workplace_pair_programming", "group")),
dropouts.with(c("scenario", "group"))
)
loo_result[2]
## $diffs
## elpd_diff
## dropouts.with(c("education_field", "workplace_pair_programming")) 0.0
## dropouts.with(c("education_field", "scenario")) -0.6
## dropouts.with(c("workplace_pair_programming", "scenario")) -1.0
## dropouts.with(c("education_field", "group")) -1.0
## dropouts.with("education_field") -1.3
## dropouts.with("workplace_pair_programming") -1.6
## dropouts.with(c("workplace_pair_programming", "group")) -1.6
## dropouts.with(c("scenario", "group")) -1.8
## dropouts.with("scenario") -2.0
## dropouts.with("group") -2.3
## dropouts.with() -2.5
## se_diff
## dropouts.with(c("education_field", "workplace_pair_programming")) 0.0
## dropouts.with(c("education_field", "scenario")) 1.1
## dropouts.with(c("workplace_pair_programming", "scenario")) 1.6
## dropouts.with(c("education_field", "group")) 0.9
## dropouts.with("education_field") 0.9
## dropouts.with("workplace_pair_programming") 1.3
## dropouts.with(c("workplace_pair_programming", "group")) 1.4
## dropouts.with(c("scenario", "group")) 1.6
## dropouts.with("scenario") 1.6
## dropouts.with("group") 1.4
## dropouts.with() 1.3
loo_result[1]
## $loos
## $loos$`dropouts.with()`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.5 7.1
## p_loo 2.7 0.5
## looic 125.1 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("education_field")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.3 7.0
## p_loo 3.4 0.6
## looic 122.5 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("workplace_pair_programming")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.6 7.0
## p_loo 3.1 0.5
## looic 123.1 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("scenario")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.0 7.0
## p_loo 3.1 0.5
## looic 124.0 13.9
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("group")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.3 7.2
## p_loo 4.0 0.6
## looic 124.6 14.3
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "workplace_pair_programming"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.0 6.9
## p_loo 3.8 0.6
## looic 120.0 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.7 6.9
## p_loo 3.8 0.6
## looic 121.3 13.8
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "group"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.0 7.1
## p_loo 4.9 0.7
## looic 122.1 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("workplace_pair_programming", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.0 6.9
## p_loo 3.5 0.5
## looic 122.0 13.8
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("workplace_pair_programming", "group"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.6 7.1
## p_loo 4.4 0.6
## looic 123.2 14.3
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("scenario", "group"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.8 7.1
## p_loo 4.5 0.6
## looic 123.7 14.1
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
dropouts.with(),
dropouts.with("education_field"),
dropouts.with("workplace_pair_programming"),
dropouts.with("scenario"),
dropouts.with("group"),
dropouts.with(c("education_field", "workplace_pair_programming")),
dropouts.with(c("education_field", "scenario")),
dropouts.with(c("workplace_pair_programming", "scenario")),
dropouts.with(c("education_field", "group")),
# New model(s)
dropouts.with(c("education_field", "workplace_pair_programming", "scenario")),
dropouts.with(c("group", "workplace_pair_programming", "scenario")),
dropouts.with(c("education_field", "group", "scenario")),
dropouts.with(c("education_field", "workplace_pair_programming", "group"))
)
loo_result[2]
## $diffs
## elpd_diff
## dropouts.with(c("education_field", "workplace_pair_programming", "scenario")) 0.0
## dropouts.with(c("education_field", "workplace_pair_programming", "group")) -0.6
## dropouts.with(c("education_field", "workplace_pair_programming")) -0.6
## dropouts.with(c("education_field", "group", "scenario")) -0.8
## dropouts.with(c("education_field", "scenario")) -1.2
## dropouts.with(c("group", "workplace_pair_programming", "scenario")) -1.5
## dropouts.with(c("workplace_pair_programming", "scenario")) -1.6
## dropouts.with(c("education_field", "group")) -1.6
## dropouts.with("education_field") -1.9
## dropouts.with("workplace_pair_programming") -2.2
## dropouts.with("scenario") -2.6
## dropouts.with("group") -2.9
## dropouts.with() -3.1
## se_diff
## dropouts.with(c("education_field", "workplace_pair_programming", "scenario")) 0.0
## dropouts.with(c("education_field", "workplace_pair_programming", "group")) 1.0
## dropouts.with(c("education_field", "workplace_pair_programming")) 0.7
## dropouts.with(c("education_field", "group", "scenario")) 0.9
## dropouts.with(c("education_field", "scenario")) 0.9
## dropouts.with(c("group", "workplace_pair_programming", "scenario")) 1.4
## dropouts.with(c("workplace_pair_programming", "scenario")) 1.3
## dropouts.with(c("education_field", "group")) 1.3
## dropouts.with("education_field") 1.2
## dropouts.with("workplace_pair_programming") 1.3
## dropouts.with("scenario") 1.3
## dropouts.with("group") 1.5
## dropouts.with() 1.5
loo_result[1]
## $loos
## $loos$`dropouts.with()`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.5 7.1
## p_loo 2.7 0.5
## looic 125.1 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("education_field")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.3 7.0
## p_loo 3.4 0.6
## looic 122.5 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("workplace_pair_programming")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.6 7.0
## p_loo 3.1 0.5
## looic 123.1 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("scenario")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.0 7.0
## p_loo 3.1 0.5
## looic 124.0 13.9
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("group")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.3 7.2
## p_loo 4.0 0.6
## looic 124.6 14.3
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "workplace_pair_programming"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.0 6.9
## p_loo 3.8 0.6
## looic 120.0 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.7 6.9
## p_loo 3.8 0.6
## looic 121.3 13.8
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("workplace_pair_programming", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.0 6.9
## p_loo 3.5 0.5
## looic 122.0 13.8
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "group"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.0 7.1
## p_loo 4.9 0.7
## looic 122.1 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "workplace_pair_programming", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -59.4 6.7
## p_loo 4.2 0.6
## looic 118.8 13.5
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("group", "workplace_pair_programming", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.9 7.0
## p_loo 4.7 0.7
## looic 121.8 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "group", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.2 6.9
## p_loo 5.1 0.7
## looic 120.5 13.9
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 71 97.3% 5152
## (0.5, 0.7] (ok) 2 2.7% 5504
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "workplace_pair_programming", "group"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.0 7.0
## p_loo 5.2 0.7
## looic 119.9 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
dropouts.with(),
dropouts.with("education_field"),
dropouts.with("workplace_pair_programming"),
dropouts.with("scenario"),
dropouts.with("group"),
dropouts.with(c("education_field", "workplace_pair_programming")),
dropouts.with(c("education_field", "scenario")),
dropouts.with(c("workplace_pair_programming", "scenario")),
dropouts.with(c("education_field", "group")),
dropouts.with(c("education_field", "workplace_pair_programming", "scenario")),
dropouts.with(c("education_field", "workplace_pair_programming", "group")),
# New model(s)
dropouts.with(c("education_field", "workplace_pair_programming", "group", "scenario"))
)
loo_result[2]
## $diffs
## elpd_diff
## dropouts.with(c("education_field", "workplace_pair_programming", "group", "scenario")) 0.0
## dropouts.with(c("education_field", "workplace_pair_programming", "scenario")) -0.2
## dropouts.with(c("education_field", "workplace_pair_programming", "group")) -0.7
## dropouts.with(c("education_field", "workplace_pair_programming")) -0.8
## dropouts.with(c("education_field", "scenario")) -1.4
## dropouts.with(c("workplace_pair_programming", "scenario")) -1.7
## dropouts.with(c("education_field", "group")) -1.8
## dropouts.with("education_field") -2.0
## dropouts.with("workplace_pair_programming") -2.3
## dropouts.with("scenario") -2.8
## dropouts.with("group") -3.1
## dropouts.with() -3.3
## se_diff
## dropouts.with(c("education_field", "workplace_pair_programming", "group", "scenario")) 0.0
## dropouts.with(c("education_field", "workplace_pair_programming", "scenario")) 0.6
## dropouts.with(c("education_field", "workplace_pair_programming", "group")) 0.8
## dropouts.with(c("education_field", "workplace_pair_programming")) 0.9
## dropouts.with(c("education_field", "scenario")) 1.2
## dropouts.with(c("workplace_pair_programming", "scenario")) 1.4
## dropouts.with(c("education_field", "group")) 1.2
## dropouts.with("education_field") 1.4
## dropouts.with("workplace_pair_programming") 1.4
## dropouts.with("scenario") 1.5
## dropouts.with("group") 1.4
## dropouts.with() 1.6
loo_result[1]
## $loos
## $loos$`dropouts.with()`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.5 7.1
## p_loo 2.7 0.5
## looic 125.1 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("education_field")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.3 7.0
## p_loo 3.4 0.6
## looic 122.5 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("workplace_pair_programming")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.6 7.0
## p_loo 3.1 0.5
## looic 123.1 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("scenario")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.0 7.0
## p_loo 3.1 0.5
## looic 124.0 13.9
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with("group")`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.3 7.2
## p_loo 4.0 0.6
## looic 124.6 14.3
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "workplace_pair_programming"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.0 6.9
## p_loo 3.8 0.6
## looic 120.0 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.7 6.9
## p_loo 3.8 0.6
## looic 121.3 13.8
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("workplace_pair_programming", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.0 6.9
## p_loo 3.5 0.5
## looic 122.0 13.8
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "group"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.0 7.1
## p_loo 4.9 0.7
## looic 122.1 14.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "workplace_pair_programming", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -59.4 6.7
## p_loo 4.2 0.6
## looic 118.8 13.5
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "workplace_pair_programming", "group"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.0 7.0
## p_loo 5.2 0.7
## looic 119.9 14.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`dropouts.with(c("education_field", "workplace_pair_programming", "group", "scenario"))`
##
## Computed from 4000 by 73 log-likelihood matrix
##
## Estimate SE
## elpd_loo -59.2 6.8
## p_loo 5.5 0.7
## looic 118.5 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
We pick some of our top performing models as candidates and inspect them closer.
The candidate models are named and listed in order of complexity.
We select the simplest model as a baseline.
droputs0 <- brm(
"task_completion ~ 1 + high_debt_version",
prior = c(
prior(normal(0, 0.4), class = "b"),
prior(normal(-2, 1), class = "Intercept")
),
family = sratio(),
data = as.data.frame(d),
file = "fits/droputs0",
file_refit = "on_change",
seed = 20210421
)
summary(droputs0)
## Family: sratio
## Links: mu = logit; disc = identity
## Formula: task_completion ~ 1 + high_debt_version
## Data: as.data.frame(d) (Number of observations: 73)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -1.19 0.31 -1.80 -0.61 1.00 4336
## Intercept[2] -2.92 0.56 -4.07 -1.93 1.00 4361
## Intercept[3] -2.63 0.52 -3.74 -1.68 1.00 4463
## high_debt_versionfalse 0.17 0.30 -0.42 0.76 1.00 4403
## Tail_ESS
## Intercept[1] 2945
## Intercept[2] 2947
## Intercept[3] 2637
## high_debt_versionfalse 2674
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(droputs0, ask = FALSE)
pp_check(droputs0, nsamples = 200, type = "bars")
We select the best performing model with one variable.
droputs1 <- brm(
"task_completion ~ 1 + high_debt_version + education_field",
prior = c(
prior(normal(0, 0.4), class = "b"),
prior(normal(-2, 1), class = "Intercept")
),
family = sratio(),
data = as.data.frame(d),
file = "fits/droputs1",
file_refit = "on_change",
seed = 20210421
)
summary(droputs1)
## Family: sratio
## Links: mu = logit; disc = identity
## Formula: task_completion ~ 1 + high_debt_version + education_field
## Data: as.data.frame(d) (Number of observations: 73)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -1.17 0.38 -1.92 -0.43 1.00
## Intercept[2] -2.88 0.60 -4.10 -1.76 1.00
## Intercept[3] -2.59 0.55 -3.69 -1.55 1.00
## high_debt_versionfalse 0.17 0.30 -0.42 0.75 1.00
## education_fieldComputerScience -0.29 0.32 -0.92 0.34 1.00
## education_fieldElectricalEngineering -0.05 0.38 -0.79 0.71 1.00
## education_fieldIndustrialengineering -0.11 0.39 -0.88 0.66 1.00
## education_fieldInteractionDesign 0.11 0.39 -0.63 0.87 1.00
## education_fieldNone 0.10 0.38 -0.65 0.86 1.00
## education_fieldSoftwareEngineering 0.32 0.32 -0.29 0.96 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 6872 3180
## Intercept[2] 7429 2866
## Intercept[3] 6958 3168
## high_debt_versionfalse 7207 2631
## education_fieldComputerScience 7522 2931
## education_fieldElectricalEngineering 6709 2691
## education_fieldIndustrialengineering 7454 3191
## education_fieldInteractionDesign 7919 2752
## education_fieldNone 8046 3139
## education_fieldSoftwareEngineering 6906 2602
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(droputs1, ask = FALSE)
pp_check(droputs1, nsamples = 200, type = "bars")
We select the best performing model with two variables.
droputs2 <- brm(
"task_completion ~ 1 + high_debt_version + education_field + workplace_pair_programming",
prior = c(
prior(normal(0, 0.4), class = "b"),
prior(normal(-2, 1), class = "Intercept")
),
family = sratio(),
data = as.data.frame(d),
file = "fits/droputs2",
file_refit = "on_change",
seed = 20210421
)
summary(droputs2)
## Family: sratio
## Links: mu = logit; disc = identity
## Formula: task_completion ~ 1 + high_debt_version + education_field + workplace_pair_programming
## Data: as.data.frame(d) (Number of observations: 73)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -0.93 0.43 -1.78 -0.11 1.00
## Intercept[2] -2.63 0.62 -3.95 -1.49 1.00
## Intercept[3] -2.32 0.58 -3.53 -1.23 1.00
## high_debt_versionfalse 0.15 0.29 -0.41 0.73 1.00
## education_fieldComputerScience -0.30 0.31 -0.90 0.32 1.00
## education_fieldElectricalEngineering -0.06 0.38 -0.80 0.69 1.00
## education_fieldIndustrialengineering -0.12 0.40 -0.90 0.66 1.00
## education_fieldInteractionDesign 0.09 0.38 -0.66 0.83 1.00
## education_fieldNone 0.09 0.40 -0.70 0.87 1.00
## education_fieldSoftwareEngineering 0.34 0.33 -0.30 0.98 1.00
## workplace_pair_programmingfalse 0.44 0.31 -0.16 1.07 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 8040 2832
## Intercept[2] 7949 2777
## Intercept[3] 8729 2700
## high_debt_versionfalse 8529 2949
## education_fieldComputerScience 7688 3163
## education_fieldElectricalEngineering 8399 3134
## education_fieldIndustrialengineering 7874 2895
## education_fieldInteractionDesign 9081 2962
## education_fieldNone 8844 2764
## education_fieldSoftwareEngineering 8125 3391
## workplace_pair_programmingfalse 8786 2795
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(droputs2, ask = FALSE)
pp_check(droputs2, nsamples = 200, type = "bars")
We select the best performing model with three variables.
droputs3 <- brm(
"task_completion ~ 1 + high_debt_version + education_field + workplace_pair_programming + scenario",
prior = c(
prior(normal(0, 0.4), class = "b"),
prior(normal(-2, 1), class = "Intercept")
),
family = sratio(),
data = as.data.frame(d),
file = "fits/droputs3",
file_refit = "on_change",
seed = 20210421
)
summary(droputs3)
## Family: sratio
## Links: mu = logit; disc = identity
## Formula: task_completion ~ 1 + high_debt_version + education_field + workplace_pair_programming + scenario
## Data: as.data.frame(d) (Number of observations: 73)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -0.76 0.45 -1.69 0.09 1.00
## Intercept[2] -2.47 0.62 -3.69 -1.31 1.00
## Intercept[3] -2.16 0.61 -3.42 -1.03 1.00
## high_debt_versionfalse 0.15 0.31 -0.45 0.75 1.00
## education_fieldComputerScience -0.30 0.32 -0.93 0.33 1.00
## education_fieldElectricalEngineering -0.06 0.39 -0.81 0.68 1.00
## education_fieldIndustrialengineering -0.12 0.41 -0.92 0.68 1.00
## education_fieldInteractionDesign 0.09 0.38 -0.62 0.84 1.00
## education_fieldNone 0.09 0.39 -0.65 0.85 1.00
## education_fieldSoftwareEngineering 0.35 0.32 -0.30 0.99 1.00
## workplace_pair_programmingfalse 0.45 0.30 -0.13 1.03 1.00
## scenariotickets 0.35 0.31 -0.24 0.94 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 7182 2835
## Intercept[2] 7069 3165
## Intercept[3] 9559 2892
## high_debt_versionfalse 9060 3073
## education_fieldComputerScience 5945 3129
## education_fieldElectricalEngineering 10469 2883
## education_fieldIndustrialengineering 9050 2772
## education_fieldInteractionDesign 9108 2920
## education_fieldNone 7480 3050
## education_fieldSoftwareEngineering 8208 3375
## workplace_pair_programmingfalse 8735 3030
## scenariotickets 8197 3129
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(droputs3, ask = FALSE)
pp_check(droputs3, nsamples = 200, type = "bars")
We select the best performing model with four variables.
droputs4 <- brm(
"task_completion ~ 1 + high_debt_version + education_field + workplace_pair_programming + scenario + group",
prior = c(
prior(normal(0, 0.4), class = "b"),
prior(normal(-2, 1), class = "Intercept")
),
family = sratio(),
data = as.data.frame(d),
file = "fits/droputs4",
file_refit = "on_change",
seed = 20210421
)
summary(droputs4)
## Family: sratio
## Links: mu = logit; disc = identity
## Formula: task_completion ~ 1 + high_debt_version + education_field + workplace_pair_programming + scenario + group
## Data: as.data.frame(d) (Number of observations: 73)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -0.85 0.48 -1.81 0.07 1.00
## Intercept[2] -2.52 0.67 -3.92 -1.30 1.00
## Intercept[3] -2.21 0.62 -3.49 -1.04 1.00
## high_debt_versionfalse 0.14 0.31 -0.46 0.74 1.00
## education_fieldComputerScience -0.34 0.32 -0.96 0.28 1.00
## education_fieldElectricalEngineering -0.06 0.38 -0.81 0.67 1.00
## education_fieldIndustrialengineering -0.11 0.40 -0.89 0.65 1.00
## education_fieldInteractionDesign 0.11 0.39 -0.64 0.90 1.00
## education_fieldNone 0.11 0.37 -0.64 0.87 1.00
## education_fieldSoftwareEngineering 0.34 0.33 -0.31 0.99 1.00
## workplace_pair_programmingfalse 0.42 0.32 -0.19 1.03 1.00
## scenariotickets 0.36 0.30 -0.24 0.96 1.00
## groupconsultants -0.23 0.33 -0.89 0.42 1.00
## groupfriends -0.06 0.34 -0.73 0.61 1.00
## groupopen -0.30 0.37 -1.03 0.42 1.00
## groupproductMcompany -0.18 0.38 -0.91 0.59 1.00
## groupprofessionalMcontact 0.12 0.39 -0.62 0.87 1.00
## groupstudents 0.15 0.33 -0.47 0.80 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 8446 3164
## Intercept[2] 9349 3021
## Intercept[3] 7858 2534
## high_debt_versionfalse 9121 2694
## education_fieldComputerScience 6925 3064
## education_fieldElectricalEngineering 9261 2712
## education_fieldIndustrialengineering 9590 2709
## education_fieldInteractionDesign 8038 3016
## education_fieldNone 9130 2888
## education_fieldSoftwareEngineering 8407 3267
## workplace_pair_programmingfalse 9179 3070
## scenariotickets 9972 2419
## groupconsultants 8317 3448
## groupfriends 8851 2884
## groupopen 9536 2995
## groupproductMcompany 10938 2968
## groupprofessionalMcontact 9005 2721
## groupstudents 9191 3493
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(droputs4, ask = FALSE)
pp_check(droputs4, nsamples = 200, type = "bars")
All candidate models look nice, the more complex models are slightly betters, we choose the simplest model that is not significantly worse then the best model: dropouts2
To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.
mcmc_areas(droputs2,
pars = c(
"b_high_debt_versionfalse",
"b_education_fieldComputerScience",
"b_education_fieldElectricalEngineering",
"b_education_fieldIndustrialengineering",
"b_education_fieldInteractionDesign",
"b_education_fieldNone",
"b_education_fieldSoftwareEngineering",
"b_workplace_pair_programmingfalse"
),
prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c(
"High debt version: false",
"Education field: Computer Science",
"Education field: Electrical Engineering",
"Education field: Industrial Engineering",
"Education field: Interaction Design",
"Education field: None",
"Education field: Software Engineering",
"No workplace pair programming"
)) +
ggtitle("Beta parameters densities for task completion", subtitle = "Shaded region marks 95% of the density. Line marks the median")
post_settings <- expand.grid(
high_debt_version = c("false", "true"),
education_field = NA,
workplace_pair_programming = NA
)
post <- posterior_predict(droputs2, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
select(
estimate,
high_debt_version
)
post %>%
mutate_at("high_debt_version",
function(x) case_when(
x == "false" ~ "Low debt",
x == "true" ~ "High debt"
)) %>%
mutate_at("estimate",
function(x) case_when(
x == 1 ~ "Not submitted",
x == 2 ~ "Does not compile",
x == 3 ~ "Invalid solution",
x == 4 ~ "Completed"
)) %>%
ggplot(aes(high_debt_version)) +
geom_bar(aes(fill = estimate), position = "fill") +
scale_y_reverse() +
scale_fill_manual("Legend", values = c("darkblue", "#7070FF", "lightblue", "transparent"), guide = guide_legend(reverse = TRUE)) +
labs(title = "Task completion") +
xlab("Debt version") +
ylab("Ratio of task completion")
We can see that task completion ratios are very similar for both high and low debt and will not proceed to calculate any specific probabilities.