---
title: ABC-rejection models
author: Thomas Gorman
date: "`r Sys.Date()`"
code-fold: true
code-tools: true
execute:
warning: false
eval: true
cache: true
---
```{r}
pacman:: p_load (dplyr,purrr,tidyr,ggplot2, data.table, here, patchwork, conflicted,
stringr,future,furrr, knitr, reactable, flextable,ggstance, htmltools)
conflict_prefer_all ("dplyr" , quiet = TRUE )
options (scipen = 999 )
walk (c ("Display_Functions" ,"fun_alm" ,"fun_indv_fit" ,"fun_model" ), ~ source (here:: here (paste0 ("Functions/" , .x, ".R" ))))
ds <- readRDS (here:: here ("data/e1_md_11-06-23.rds" )) |> as.data.table ()
fd <- readRDS (here ("data/e1_08-21-23.rds" ))
test <- fd |> filter (expMode2 == "Test" )
testAvg <- test %>% group_by (id, condit, vb, bandInt,bandType,tOrder) %>%
summarise (nHits= sum (dist== 0 ),vx= mean (vx),dist= mean (dist),sdist= mean (sdist),n= n (),Percent_Hit= nHits/ n)
nbins= 3
trainAvg <- fd |> filter (expMode2 == "Train" ) |> group_by (id) |>
mutate (tr= trial,x= vb,Block= case_when (expMode2== "Train" ~ cut (tr,breaks= seq (1 ,max (tr), length.out= nbins+ 1 ),include.lowest= TRUE ,labels= FALSE ),
expMode2== "Test" ~ 4 )) |>
group_by (id,condit,vb,x,Block) |>
summarise (dist= mean (dist),y= mean (vx))
# df with trial count columns for each level of expMode, for each id
id_n <- ds |> group_by (id,condit) |> summarise (nTest= n_distinct (tr[expMode2== "Test" ]),
nTrain= n_distinct (tr[expMode2== "Train" ])) |>
mutate (nTotal= nTest+ nTrain)
testAvg <- test %>% group_by (id, condit, vb, bandInt,bandType,tOrder) %>%
summarise (nHits= sum (dist== 0 ),vx= mean (vx),dist= mean (dist),sdist= mean (sdist),n= n (),Percent_Hit= nHits/ n)
input_layer <<- output_layer <<- c (100 ,350 ,600 ,800 ,1000 ,1200 )
ids <- c (1 ,2 ,4 ,5 ,6 ,7 ,8 , 10 ,11 ,12 ,13 )
ids2 <- c (1 ,66 ,36 )
ids3 <- c (20 ,71 ,101 ,4 ,76 ,192 )
idsBad <- c (76 ,192 , 101 )
#file_name <- "n_iter_300_ntry_3000_0800"
#file_name <- "n_iter_100_ntry_200_4509"
file_name <- "n_iter_100_ntry_400_3247"
file_name <- "n_iter_200_ntry_300_5354"
# list.files(here('data/abc_reject'))
# (grep("Train",list.files(here(paste0('data/abc_reject/',file_name)),
# pattern="EXAM_Test",full.names = TRUE),
# invert=TRUE, value=TRUE))
ind_fits <- map (list.files (here (paste0 ('data/abc_reject/' ),file_name),full.names= TRUE ), readRDS)
# ind_fits <- map(list.files(here('data/abc_reject/n_iter_2000_ntry_10_2918'),full.names=TRUE), readRDS)
ind_fits_df <- ind_fits |> map (~ list (dat= .x[[1 ]], Model = .x[["Model" ]], Fit_Method= .x[["Fit_Method" ]]))
ind_fits_df <- ind_fits_df |> map (~ rbindlist (.x$ dat) |> mutate (Model = .x$ Model, Fit_Method = .x$ Fit_Method)) |> rbindlist ()
avg_samples <- ind_fits_df |> group_by (id,condit,Model,Fit_Method) |>
summarise (inc= max (inc_count),iter_sum= sum (iter_count))
mean (avg_samples$ iter_sum)
mean (avg_samples$ iter_sum)
min (avg_samples$ iter_sum)
max (avg_samples$ iter_sum)
sum (avg_samples$ iter_sum)
sum (avg_samples$ iter_sum)
avg_samples |> group_by (Model,Fit_Method) |> summarise (mean (inc),mean (iter_sum))
#run_params <- tibble::lst(file_name,cMean=ind_fits[[1]]$cMean, cSig=ind_fits[[1]]$cSig,
run_params <- keep (ind_fits[[1 ]], ~ any (class (.x) %in% c ("character" , "numeric" )))
run_params <- tibble (!!! run_params) |> select (- Model,- Fit_Method)
extract_info <- function (raw_names, run_params) {
fname <- tools:: file_path_sans_ext (basename (raw_names))
n_samp <- str_extract (raw_names, "(?<=_) \\ d+(?=_)" )
ntry_ <- str_extract (raw_names, "(?<=ntry_) \\ d+" )
type <- 'ss'
data.frame (run_params, n_samp, ntry_, type, fname)
}
run_info <- extract_info (file_name,run_params)
```
```{r}
generate_data <- function (Model, post_samples, data, num_samples = 1 , return_dat = "train_data, test_data" ) {
# Filter data for the specific id without invalidating selfref
sbj_data <- copy (data[id == post_samples$ id[1 ]])
simulation_function <- ifelse (Model == "EXAM" , full_sim_exam, full_sim_alm)
target_data <- switch (return_dat,
"test_data" = copy (sbj_data[expMode2 == "Test" ]),
"train_data" = copy (sbj_data[expMode2 == "Train" ]),
"train_data, test_data" = copy (sbj_data[expMode2 %in% c ("Test" , "Train" )]))
post_samples <- post_samples[order (mean_error)][1 : num_samples, .(c, lr, mean_error, rank = .I)]
simulated_data_list <- lapply (1 : nrow (post_samples), function (i) {
params <- post_samples[i]
sim_data <- simulation_function (sbj_data, params$ c, params$ lr, input_layer = input_layer,
output_layer = output_layer, return_dat = return_dat)
sim_data_dt <- data.table (id = sbj_data$ id[1 ], condit = sbj_data$ condit[1 ],
expMode2 = target_data$ expMode2, Model = Model,tr= target_data$ tr,
y = target_data$ y, x = target_data$ x, c = params$ c,
lr = params$ lr, mean_error = params$ mean_error, rank = i,
pred = sim_data)
return (sim_data_dt)
})
result_dt <- rbindlist (simulated_data_list)
setcolorder (result_dt, c ("id" , "condit" , "expMode2" ,"tr" , "c" , "lr" , "x" , "y" , "pred" ))
return (result_dt)
}
future:: plan (multisession)
nestSbjModelFit <- ind_fits_df %>% nest (.by= c (id,Model,Fit_Method))
# organize test data predictions
post_dat <- nestSbjModelFit |> mutate (pp= furrr:: future_pmap (list (id,Model,Fit_Method,data), ~ {
generate_data (..2 , ..4 |> mutate (id= ..1 ), ds, num_samples = 20 , return_dat= "test_data" )
})) |>
select (Fit_Method,pp,- data) |>
unnest (pp) |> filter (expMode2== "Test" ) |> as.data.table ()
post_dat_avg <- post_dat |> group_by (id, condit, Model, Fit_Method, x, c, lr, rank) |>
summarise (y = mean (y), pred = mean (pred), error = y - pred) |> as.data.table ()
post_dat_avg2 <- post_dat_avg |>
group_by (id, condit, Model, Fit_Method, c, lr, rank) |>
summarise (y = mean (y), pred = mean (pred), error = y - pred) |> as.data.table () |> arrange (rank)
map_id <- post_dat |> group_by (id, condit, Model, Fit_Method) |>
filter (rank== 1 ) |> slice_head (n= 1 ) |> select (id,condit,Fit_Method,Model,c,lr,mean_error,rank) |>
left_join (id_n,by= (c ("id" ,"condit" )))
# compute BIC for each row of map_id; p=2 for c and lr
map_id <- map_id |> mutate (n = fifelse (Fit_Method == "Test" , nTest, fifelse (Fit_Method == "Train" , nTrain, nTotal)),BIC = - 2 * log (mean_error) + log (n)* 2 )
bic_id <- map_id %>%
group_by (id, Fit_Method) %>%
arrange (Model) %>%
summarize (BIC_diff = diff (BIC),
.groups = 'drop' ) %>%
left_join (map_id, by = c ("id" , "Fit_Method" )) %>%
select (id, Fit_Method, Model, BIC, BIC_diff)
setorder (post_dat_avg, id, x, rank)
post_dat_l <- melt (post_dat_avg, id.vars = c ("id" , "condit" , "Model" , "Fit_Method" , "x" , "c" , "lr" , "rank" ,"error" ),
measure.vars = c ("pred" , "y" ), variable.name = "Resp" , value.name = "val" )
post_dat_l[, Resp : = fifelse (Resp == "y" , "Observed" ,
fifelse (Model == "ALM" , "ALM" , "EXAM" ))]
setorder (post_dat_l, id, Resp)
#rm(post_dat_avg)
# organize training data predictions
pd_train <- nestSbjModelFit |> mutate (pp= furrr:: future_pmap (list (id,Model,Fit_Method,data), ~ {
generate_data (..2 , ..4 |> mutate (id= ..1 ), ds, num_samples = 20 , return_dat= "train_data" )
})) |>
select (Fit_Method,pp,- data) |>
unnest (pp) |> as.data.table () |> filter (expMode2== "Train" )
nbins <- 3
pd_train <- pd_train |> group_by (id,condit,Model,Fit_Method) |>
mutate (Block= cut (tr,breaks= seq (1 ,max (tr), length.out= nbins+ 1 ),include.lowest= TRUE ,labels= FALSE ))
setorder (pd_train, id, x,Block, rank)
pd_train_l <- melt (pd_train, id.vars = c ("id" , "condit" , "Model" ,"Block" , "Fit_Method" , "x" , "c" , "lr" , "rank" ),
measure.vars = c ("pred" , "y" ), variable.name = "Resp" , value.name = "val" ) |> as.data.table ()
pd_train_l[, Resp : = fifelse (Resp == "y" , "Observed" ,
fifelse (Model == "ALM" , "ALM" , "EXAM" ))]
setorder (pd_train_l, id,Block, Resp)
pd_train_l <- pd_train_l |>
mutate (dist = case_when (
val >= x & val <= x + 200 ~ 0 ,
val < x ~ abs (x - val),
val > x + 200 ~ abs (val - (x + 200 )),
TRUE ~ NA_real_
))
plan (sequential)
plan (sequential)
# theta_map <- post_dat_avg |> group_by(condit,Model,Fit_Method) |> summarize(c=median(c),lr=median(lr))
#
# map <- simulation_function(sbj_data <- ds |> theta_map$c[1], , params$lr, input_layer = input_layer,
# output_layer = output_layer, return_dat = return_dat)
# AIC and BIC
# mc2 <- post_dat |> filter(rank==1) |>
# group_by(id,condit,Model,Fit_Method) |>
# mutate(e2=abs(y-pred),n=n(),k=2) |>
# group_by(id,condit,Model,Fit_Method,x) |>
# summarise(y=mean(y), pred=mean(pred), e2=mean(e2), mean_error=first(mean_error),n=first(n),k=first(k)) |>
# group_by(id,condit,Model,Fit_Method) |>
# summarise(e2=mean(e2), mean_error=first(mean_error),n=first(n),k=first(k)) |>
# mutate(AIC=2*k+n*mean_error, BIC=log(n)*k+n*mean_error)
# Single run extraction:
# exam_test <- ind_fits_df |> filter(Model == "EXAM", Fit_Method == "Test")
# post_dat_trial <- exam_test %>% split(f =c(.$id), drop=TRUE) |>
# map(~generate_data(.x$Model, .x, ds, num_samples = 15, return_dat="train_data, test_data")) |>
# rbindlist() |>
# filter(expMode2=="Test")
#
# post_dat_avg <- post_dat_trial |> group_by(id,condit,x,c,lr,rank) |>
# summarise(y = mean(y),pred = mean(pred)) |> arrange(id,x,rank)
#post_dat_l |> filter(Model == "EXAM", Fit_Method == "Test",id==1) |> arrange(rank)
# head(post_dat_l)
bestTestEXAM <- post_dat_l |>
filter (Fit_Method== "Test" , rank== 1 , Resp== "EXAM" ) |>
group_by (id,condit,Model,Fit_Method) |>
summarise (mae= mean (abs (error))) |> arrange (mae) |> ungroup () |>
mutate (group_rank = row_number (),.by= c (condit))
bestTestALM<- post_dat_l |>
filter (Fit_Method== "Test" , rank== 1 , Resp== "ALM" ) |>
group_by (id,condit,Model,Fit_Method) |>
summarise (mae= mean (abs (error))) |> arrange (mae) |> ungroup () |>
mutate (group_rank = row_number (),.by= c (condit))
bestTrainALM<- post_dat_l |>
filter (Fit_Method== "Train" , rank== 1 , Resp== "ALM" ) |>
group_by (id,condit,Model,Fit_Method) |>
summarise (mae= mean (abs (error))) |> arrange (mae) |> ungroup () |>
mutate (group_rank = row_number (),.by= c (condit))
bestTrainEXAM<- post_dat_l |>
filter (Fit_Method== "Train" , rank== 1 , Resp== "EXAM" ) |>
group_by (id,condit,Model,Fit_Method) |>
summarise (mae= mean (abs (error))) |> arrange (mae) |> ungroup () |>
mutate (group_rank = row_number (),.by= c (condit))
bestTtEXAM <- post_dat_l |>
filter (Fit_Method== "Test_Train" , rank== 1 , Resp== "EXAM" ) |>
group_by (id,condit,Model,Fit_Method) |>
summarise (mae= mean (abs (error))) |> arrange (mae) |> ungroup () |>
mutate (group_rank = row_number (),.by= c (condit))
```
####
## Posterior Average Table
```{r fig.width=12, fig.height=17}
#| eval: true
#| tbl-cap: "Posterior Distribution"
#| tbl-subcap:
#| - "Full Posterior"
#| - "Best Posterior"
# post_tabs <- abc_tables(post_dat_l)
# post_tabs$et_sum |> gt::gt()
post_tabs <- abc_tables(post_dat,post_dat_l)
# tables with fit error
# post_tabs$agg_full |> flextable::tabulator(rows=c("Fit_Method","Model"), columns=c("condit"),
# `ME` = as_paragraph(mean_error)) |> as_flextable() |>
# set_caption("Mean from full_posterior")
# post_tabs$agg_best |> flextable::tabulator(rows=c("Fit_Method","Model"), columns=c("condit"),
# `ME` = as_paragraph(mean_error)) |> as_flextable() |>
# set_caption("Mean from best parameters")
post_tabs$agg_pred_full |> flextable::tabulator(rows=c("Fit_Method","Model"), columns=c("condit"),
`ME` = as_paragraph(mean_error)) |> as_flextable() |>
set_caption("Mean from full_posterior")
post_tabs$agg_pred_best |> flextable::tabulator(rows=c("Fit_Method","Model"), columns=c("condit"),
`ME` = as_paragraph(mean_error)) |> as_flextable() |>
set_caption("Mean from full_posterior")
```
## Interactive Test Predictions Table
```{r}
tags$ select (
tags$ option (value = "" , "All" ),
purrr:: map (unique (post_tabs$ agg_x_full$ condit), tags$ option),
purrr:: map (unique (post_tabs$ agg_x_full$ Model), tags$ option),
purrr:: map (unique (post_tabs$ agg_x_full$ Fit_Method), tags$ option),
purrr:: map (unique (post_tabs$ agg_x_full$ x), tags$ option)
)
selectFilter <- function (tableId, style = "width: 100%; height: 100%;" ) {
function (values, name) {
tags$ select (
# Set to undefined to clear the filter
onchange = sprintf ("
const value = event.target.value
Reactable.setFilter('%s', '%s', value === '__ALL__' ? undefined : value)
" , tableId, name),
# "All" has a special value to clear the filter, and is the default option
tags$ option (value = "__ALL__" , "All" ),
lapply (unique (values), tags$ option),
"aria-label" = sprintf ("Filter %s" , name),
style = style
)
}
}
post_tabs$ agg_x_full |>
left_join (post_tabs$ agg_x_best |> rename ("best_error" = mean_error,"best_pred" = pred),
by= c ("condit" ,"Model" ,"Fit_Method" ,"x" ,"y" )) |>
arrange (desc (condit),x,Fit_Method) |>
mutate (x= as.character (x)) |>
reactable:: reactable (
columns = list (condit= colDef (name= "Condit" ,filterInput= selectFilter ("my-tbl" )),
Model= colDef (name= "Model" ,filterInput= selectFilter ("my-tbl" )),
Fit_Method= colDef (name= "Fit_Method" ,filterInput= selectFilter ("my-tbl" ),
filterMethod = JS ("(rows, columnId, filterValue) => {
return rows.filter(row => row.values[columnId] === filterValue)
}" )),
x= colDef (name= "x" ,filterInput= selectFilter ("my-tbl" ),
filterMethod = JS ("(rows, columnId, filterValue) => {
return rows.filter(row => row.values[columnId] === filterValue)
}" ))),
elementId = "my-tbl" ,
defaultPageSize= 15 ,
filterable= TRUE )
```
## Train Predictions
```{r}
pt_train <- abc_train_tables (pd_train,pd_train_l)
pt_train$ agg_pred_full |> flextable:: tabulator (rows= c ("Fit_Method" ,"Model" ), columns= c ("condit" ),
` ME ` = as_paragraph (mean_error)) |> as_flextable () |>
set_caption ("Mean from full_posterior" )
pt_train$ block_pred_full |>
flextable:: tabulator (rows= c ("Fit_Method" ,"Model" ,"Block" ), columns= c ("condit" ),
` Observed ` = as_paragraph (y),
` Predicted ` = as_paragraph (pred),
` ME ` = as_paragraph (mean_error)) |> as_flextable () |>
set_caption ("Mean from full_posterior" )
pt_train$ block_pred_full |> filter (condit== "Varied" ) |>
flextable:: tabulator (rows= c ("Fit_Method" ,"Block" ), columns= c ("Model" ),
# `Observed` = as_paragraph(y),
` Predicted ` = as_paragraph (pred),
` ME ` = as_paragraph (mean_error)) |> as_flextable () |>
set_caption ("Mean from full_posterior" )
pt_train$ block_pred_full |>
flextable:: tabulator (rows= c ("Fit_Method" ,"Block" ), columns= c ("condit" ,"Model" ),
# `Observed` = as_paragraph(y),
` Predicted ` = as_paragraph (pred),
` ME ` = as_paragraph (mean_error)) |> as_flextable () |>
set_caption ("Mean from full_posterior" )
pt_train$ block_pred_full |>
flextable:: tabulator (rows= c ("Fit_Method" ,"Block" ), columns= c ("condit" ,"Model" ),
# `Observed` = as_paragraph(y),
` ME ` = as_paragraph (mean_error)) |> as_flextable () |>
set_caption ("Mean from full_posterior" )
best_indices <- pt_train$ block_pred_full_x %>%
filter (! (Fit_Method== "Test" )) |>
nest (data = - condit) %>%
mutate (indices = map (data, ~ {
.x %>%
select (- y, - pred, - mean_error) %>%
pivot_wider (names_from = Model, values_from = best) %>%
ungroup () |>
select (ALM, EXAM) %>%
pmap (.f = function (ALM, EXAM) {
which (c (ALM, EXAM) == 1 )
})
})) %>%
select (condit, indices)
pt_train$ block_pred_full_x |> filter (condit== "Varied" ,! (Fit_Method== "Test" )) |>
flextable:: tabulator (rows= c ("Fit_Method" ,"Block" ,"x" ), columns= c ("Model" ),
# `Observed` = as_paragraph(y),
` Predicted ` = as_paragraph (pred),
` ME ` = as_paragraph (mean_error)) |>
as_flextable () %>%
apply_best_formatting (.,pull (filter (best_indices,condit== "Varied" ),indices) %>% .[[1 ]]) |>
set_caption ("Vared Train Predictions" )
pt_train$ block_pred_full_x |> filter (condit== "Constant" ,! (Fit_Method== "Test" )) |>
flextable:: tabulator (rows= c ("Fit_Method" ,"Block" ,"x" ), columns= c ("Model" ),
# `Observed` = as_paragraph(y),
` Predicted ` = as_paragraph (pred),
` ME ` = as_paragraph (mean_error)) |>
as_flextable () %>%
apply_best_formatting (.,pull (filter (best_indices,condit== "Constant" ),indices) %>% .[[1 ]]) |>
set_caption ("Constant Train Predictions" )
# pull(filter(best_indices,condit=="Varied"),indices) %>% .[[1]]
```
## Posterior Predictive:
```{r, fig.width=12, fig.height=15}
#| eval: true
group_predictive_plots(post_dat_l)
testAvg |> ggplot(aes(x=vx)) + geom_density() + facet_wrap(~vb) +
post_dat_avg |> filter(rank==1) |> ggplot(aes(x=pred)) + geom_density() + facet_wrap(~x)
post_dat_avg |> filter(rank==1) |> ggplot(aes(x=pred)) +
geom_density() + geom_density(data=post_dat_avg, aes(x=y), color="red") +
facet_wrap(condit~x,ncol=6)
post_dat_avg |> filter(Model=="EXAM",Fit_Method=="Test") |> ggplot(aes(x=pred)) +
geom_density() + geom_density(data=post_dat_avg, aes(x=y), color="red") +
facet_wrap(condit~x,ncol=6)
post_dat_avg |> filter(Model=="ALM",Fit_Method=="Test") |> ggplot(aes(x=pred)) +
geom_density() + geom_density(data=post_dat_avg, aes(x=y), color="red") +
facet_wrap(condit~x,ncol=6)
post_dat_avg |> filter(Fit_Method=="Test") |> ggplot(aes(x=pred)) +
geom_density(aes(fill=Model),alpha=.5) + geom_density(data=post_dat_avg, aes(x=y), color="black",alpha=2) +
facet_wrap(condit~x,ncol=6)
post_dat_avg |> filter(Fit_Method=="Test_Train") |> ggplot(aes(x=pred)) +
geom_density(aes(fill=Model),alpha=.5) + geom_density(data=post_dat_avg, aes(x=y), color="black",alpha=2) +
facet_wrap(condit~x,ncol=6)
post_dat_avg |> filter(Fit_Method=="Train",pred>0,pred<2000) |> ggplot(aes(x=pred)) +
geom_density(aes(fill=Model),alpha=.5) + geom_density(data=post_dat_avg, aes(x=y), color="black",alpha=2) +
facet_wrap(condit~x,ncol=6)
```
### Alt grouping of Posterior Predictive
```{r, fig.width=12, fig.height=15}
group_predictive_plots2(post_dat_l)
```
```{r}
post_dat_avg |> group_by (id,condit,Fit_Method,x) |>
ggplot (aes (x = x, y = error, fill= Model)) +
stat_bar +
facet_wrap (condit~ Fit_Method, scales= "free" ) +
labs (title = "Model Resituals - full posterior" )
post_dat_avg |> group_by (id,condit,Fit_Method,x) |>
filter (rank== 1 ) |>
ggplot (aes (x = x, y = error, fill= Model)) +
stat_bar +
facet_wrap (condit~ Fit_Method, scales= "free" ) +
labs (title = "Model Residuals - best parameters" )
```
```{r fig.width=11, fig.height=12}
plot_indv_posterior(ind_fits_df |> mutate(Group=condit))
plot_indv_posterior(post_dat |> filter(rank==1) |> mutate(Group=condit))
# plot join density of c and lr
# post_dat |> filter(rank==1, c<.001) |>
# ggplot(aes(x=c, y=lr, color=condit)) + geom_point() + facet_wrap(~Model+Fit_Method)
post_dat |> filter(c<.001) |> ggplot(aes(x=c,y=condit,fill=condit)) + geom_boxploth(width=.5) + facet_wrap(~Model+Fit_Method,scales="free")
post_dist <- post_dat_avg %>%
group_by(id, condit, Model, Fit_Method, rank) %>%
slice_head(n = 1) %>%
ungroup() %>% # Make sure to remove the grouping by rank before the next group_by
group_by(id, condit, Model, Fit_Method) %>%
summarize(across(c:lr, list(mean = mean, sd = sd, se = ~sd(.)/sqrt(n()))), .groups = 'keep')
post_dist |>
group_by(condit, Model, Fit_Method) %>%
mutate(across(c_mean, list(gmean = mean, gsd = sd, gse = ~sd(.)/sqrt(n()))), .groups = 'keep') |>
filter(c_mean<(c_mean_gmean+(.5*c_mean_gse))) |>
ggplot(aes(y=id, x = c_mean)) +
stat_pointinterval() +
ggh4x::facet_nested_wrap(Fit_Method~condit~Model,scales="free")
post_dist |>
group_by(condit, Model, Fit_Method) %>%
mutate(across(c_mean, list(gmean = median, gsd = sd, gse = ~sd(.)/sqrt(n()))), .groups = 'keep') |>
filter(c_mean<(c_mean_gmean+(.5*c_mean_gse))) |>
ggplot(aes( x = c_mean,fill=condit)) + geom_density() +
ggh4x::facet_nested_wrap(Fit_Method~Model,scales="free")
post_dist |>
group_by(condit, Model, Fit_Method) %>%
mutate(across(c_mean, list(gmean = median, gsd = sd, gse = ~sd(.)/sqrt(n()))), .groups = 'keep') |>
filter(c_mean<(c_mean_gmean+(.5*c_mean_gse))) |>
ggplot(aes( x = log(c_mean),fill=condit)) + geom_density() +
ggh4x::facet_nested_wrap(Fit_Method~Model,scales="free")
post_dat_avg %>%
group_by(id, condit, Model, Fit_Method, rank) %>%
slice_head(n = 1) |>
ggplot(aes( x = log(c),fill=condit)) + geom_density() +
ggh4x::facet_nested_wrap(Fit_Method~Model,scales="free")
post_dat_avg %>%
group_by(id, condit, Model, Fit_Method, rank) %>%
slice_head(n = 1) |>
ggplot(aes(y=Fit_Method, x = log(c),col=condit)) + stat_pointinterval(position=position_dodge(.2)) +
ggh4x::facet_nested_wrap(~Model,scales="free")
post_dat_avg %>%
group_by(id, condit, Model, Fit_Method, rank) %>%
slice_head(n = 1) |>
ggplot(aes(y=Fit_Method, x = lr,col=condit)) + stat_pointinterval(position=position_dodge(.2)) +
ggh4x::facet_nested_wrap(~Model,scales="free")
# post_dat_avg %>%
# group_by(id, condit, Model, Fit_Method, rank) %>%
# slice_head(n = 1) |>
# ggplot(aes(y=Fit_Method, x = (lr),fill=condit)) +
# stat_halfeye(position=position_dodge(.2)) +
# ggh4x::facet_nested_wrap(~Model,scales="free")
#
```
## Model Comparison:
```{r, fig.width=12, fig.height=13}
#| eval: true
indv_best_plots(post_dat_l)
post_dat_l |> group_by(condit,Model,Fit_Method,rank) |>
summarise(mean_error=mean(abs(error)), n=n()) |>
ggplot(aes(x=rank,y=mean_error,fill=Model))+geom_col()+facet_wrap(~Fit_Method,scales="free")
post_dat_l |> group_by(condit,Model,Fit_Method,rank,x) |>
summarise(mean_error=mean(abs(error)), n=n()) |>
ggplot(aes(x=rank,y=mean_error,fill=Model))+geom_col()+facet_wrap(Fit_Method~x,ncol=6,scales="free")
group_best_plots(post_dat_l)
```
## Individual Predictive Plots
```{r fig.width=11, fig.height=12}
indv_predictive_plots(post_dat_l, ids2)
indv_predictive_plots(post_dat_l, idsBad)
```
### Subject 1
```{r fig.width=11, fig.height=12}
indv_predictive_dist((post_dat_l |> filter(rank<=200)),ind_fits_df, sbj=list(1))
abc_tables(post_dat |> filter(id==1))$agg_full |> flextable() |> set_caption("Mean from full posterior")
abc_tables(post_dat |> filter(id==1))$agg_best |> flextable() |> set_caption("Mean from best parameters")
post_dat_l |> filter(id==1) |> group_by(condit,Model,Fit_Method,rank) |>
summarise(mean_error=mean(abs(error)), n=n()) |>
ggplot(aes(x=rank,y=mean_error,fill=Model))+geom_col()+facet_wrap(~Fit_Method)
# plot_indv_posterior(ind_fits_df |> filter(id==1))
# ind_fits_df |> filter(id==1, Fit_Method=="Test Only", Model=="EXAM") |> pull(c) |> unique()
```
### Subject 36
```{r fig.width=11, fig.height=12}
indv_predictive_dist(post_dat_l,ind_fits_df, sbj=list(36))
#abc_tables(post_dat_l |> filter(id==36))$et_sum |> gt::gt()
abc_tables(post_dat |> filter(id==36))$agg_full |> flextable() |> set_caption("Mean from full posterior")
abc_tables(post_dat |> filter(id==36))$agg_best |> flextable() |> set_caption("Mean from best parameters")
# plot_indv_posterior(ind_fits_df |> filter(id==1))
# ind_fits_df |> filter(id==1, Fit_Method=="Test Only", Model=="EXAM") |> pull(c) |> unique()
```
## More Comparisons
```{r fig.width=11, fig.height=12}
best_id_x <- post_dat |> group_by(id,condit,Model,Fit_Method,x) |>
mutate(e2=(y-pred)) |>
summarise(y=mean(y), pred=mean(pred), mean_error=mean(e2),abs_me=abs(mean_error)) |>
group_by(id,condit,Fit_Method,x) |> mutate(best=ifelse(mean_error==min(mean_error),1,0)) |>
group_by(id,condit,Fit_Method,Model) |> mutate(n_best=sum(best))
best_id <- best_id_x |> group_by(id,condit,Fit_Method,Model) |>
summarise(mean_error=mean(mean_error), n_best=first(n_best),abs_me=mean(abs(mean_error)))
lowest_error_model <- best_id %>%
group_by(id, condit,Fit_Method) %>%
summarise(Best_Model = Model[which.min(mean_error)],
n_best = n_best[which.min(mean_error)],
Lowest_error = min(mean_error),
differential = min(mean_error) - max(mean_error)) %>%
ungroup()
error_difference <- best_id %>%
select(id, condit, Model,Fit_Method, mean_error,abs_me) %>%
pivot_wider(names_from = Model, values_from = c(abs_me,mean_error)) %>%
mutate(Error_difference = (mean_error_ALM - mean_error_EXAM), abs_error_dif = (abs_me_ALM - abs_me_EXAM))
full_comparison <- lowest_error_model |> left_join(error_difference, by=c("id","condit","Fit_Method")) |>
group_by(condit,Fit_Method,Best_Model) |> mutate(nGrp=n(), model_rank = nGrp - rank(Error_difference) ) |>
arrange(Fit_Method,-Error_difference)
best_id |> filter(Fit_Method=="Test_Train") |>
arrange(mean_error) |>
ungroup() |>
mutate(id = reorder(id, mean_error)) %>%
ggplot(aes(y=id,x=mean_error,fill=Model))+
geom_col()+
ggh4x::facet_grid2(~condit,axes="all",scales="free_y", independent = "y")
best_id |> filter(Fit_Method=="Test_Train") |>
ungroup() |>
mutate(id = reorder(id, abs_me)) %>%
ggplot(aes(y=id,x=abs_me,fill=Model))+
geom_col()+
ggh4x::facet_grid2(~condit,axes="all",scales="free_y", independent = "y")
full_comparison |> filter(Fit_Method=="Test_Train") |>
ungroup() |>
mutate(id = reorder(id, abs_error_dif)) %>%
ggplot(aes(y=id,x=abs_error_dif))+
geom_col()+
ggh4x::facet_grid2(~condit,axes="all",scales="free_y", independent = "y")
full_comparison |> filter(Fit_Method=="Test_Train") |>
ungroup() |>
mutate(id = reorder(id, Error_difference)) %>%
ggplot(aes(y=id,x=Error_difference))+
geom_col()+
ggh4x::facet_grid2(~condit,axes="all",scales="free_y", independent = "y")
d <- testAvg |> left_join(full_comparison, by=c("id","condit")) |> filter(Fit_Method=="Test_Train")
d |> ggplot(aes(x=vb,y=dist,fill=condit)) + stat_bar + facet_wrap(Fit_Method~Best_Model,ncol=2)
d |>
group_by(condit,Fit_Method,Best_Model) |>
mutate(nGrp2=n()) |>
filter(abs(Error_difference)>15) |>
ggplot(aes(x=vb,y=dist,fill=condit)) +
stat_bar + facet_wrap(Fit_Method~Best_Model,ncol=2)
d |> group_by(condit,Fit_Method,Best_Model) %>% tally() |> mutate(n=n/6)
d |> group_by(condit,Fit_Method,Best_Model) |> filter(abs(Error_difference)>15) |> tally() |> mutate(n=n/6)
d |> filter(bandInt==100) |> group_by(condit,Fit_Method) |> summarise(m=mean(Error_difference),
sd=sd(Error_difference),
n=n(),se=sd/sqrt(n))
# head(map_id,2)
# # A tibble: 2 × 13
# # Groups: id, condit, Model, Fit_Method [2]
# id condit Fit_Method Model c lr mean_error rank nTest nTrain nTotal n BIC
# <fct> <fct> <chr> <chr> <dbl> <dbl> <dbl> <int> <int> <int> <int> <int> <dbl>
# 1 1 Varied Test ALM 0.00000165 5.32 238. 1 63 86 149 63 -6.80
# 2 1 Varied Test_Train ALM 0.0000634 5.60 276. 1 63 86 149 149 -6.24
```
```{r fig.width=11, fig.height=12}
#full_comparison |> round_tibble(1) |> reactable(filterable=T,defaultPageSize=15)
ggplot(full_comparison, aes(x = Best_Model, fill = condit)) +
geom_bar(position = "dodge") +
facet_wrap(~Fit_Method)+
labs(title = "Distribution of Subjects Best Fit by Model",
x = "Model with Lowest Error",
y = "Count of Subjects") +
theme_minimal() +
scale_fill_brewer(palette = "Set1")
# Scatter plot showing the differential in mean error
ggplot(full_comparison, aes(x = Error_difference, y = condit, color = Best_Model)) +
geom_point(alpha=.5) +
stat_pointinterval()+
facet_wrap(~Fit_Method)+
labs(title = "Differential in Mean Error Between Models",
x = "Error Difference (ALM - EXAM)",
y = "Condition") +
theme_minimal() +
scale_color_brewer(palette = "Set1")
# ggplot(full_comparison, aes(x = Error_difference)) +
# geom_histogram(aes(fill=Best_Model),bins = 40, alpha = 0.7) +
# geom_vline(aes(xintercept = mean(Error_difference)), color = "red", linetype = "dashed", size = 1) +
# facet_wrap(~Fit_Method)+
# labs(
# title = "Distribution of Error Differential (ALM - EXAM)",
# x = "Error Differential",
# y = "Frequency"
# ) +
# theme_minimal()
best_exam <- full_comparison |> filter(Best_Model=="EXAM")
best_alm <- full_comparison |> filter(Best_Model=="ALM")
d <- testAvg |> left_join(full_comparison, by=c("id","condit"))
d |> ggplot(aes(x=vb,y=dist,fill=condit)) + stat_bar + facet_wrap(Fit_Method~Best_Model,ncol=2)
d |> filter(abs(Error_difference)>20) |>
ggplot(aes(x=vb,y=dist,fill=condit)) +
stat_bar + facet_wrap(Fit_Method~Best_Model,ncol=2)
d |> ggplot(aes(x=vb,y=vx,fill=condit)) + stat_bar + facet_wrap(Fit_Method~Best_Model,ncol=2)
d |> filter(Fit_Method=="Test") |>
ggplot(aes(x=Error_difference,y=dist,col=condit)) + geom_point() + facet_wrap(~vb,scales="free",ncol=2)
d |> group_by(id,condit,Fit_Method) |> summarise(Error_difference=mean(Error_difference),dist=mean(dist)) |>
ggplot(aes(x=Error_difference,y=dist,col=condit)) + geom_point() + facet_wrap(~Fit_Method,scales="free")
# ds |> filter(expMode2=="Test", id %in% best_exam$id) |>
# group_by(id,condit,x) |> summarise(y=mean(y), dist=mean(abs(x-y))) |>
# mutate(x=as.factor(x)) |>
# ggplot(aes(x=x,y=dist,fill=condit)) + stat_bar
```
## Best Test - EXAM Fits
```{r fig.width=11, fig.height=12}
strong_fits <- full_comparison |> filter(model_rank<=1)
strong_fits |> filter(Fit_Method=="Test",Best_Model=="EXAM") |> pull(id) %>% indv_predictive_plots(post_dat_l, .)
```
## Best Test - ALM Fits
```{r fig.width=11, fig.height=12}
strong_fits <- full_comparison |> filter(model_rank<=1)
strong_fits |> filter(Fit_Method=="Test",Best_Model=="ALM") |> pull(id) %>% indv_predictive_plots(post_dat_l, .)
```
## Best Test_Train - EXAM Fits
```{r fig.width=11, fig.height=12}
strong_fits <- full_comparison |> filter(model_rank<1)
strong_fits |> filter(Fit_Method=="Test_Train",Best_Model=="EXAM") |> pull(id) %>% indv_predictive_plots(post_dat_l, .)
```
## Best Test_Train - ALM Fits
```{r fig.width=11, fig.height=12}
strong_fits <- full_comparison |> filter(model_rank<=1)
strong_fits |> filter(Fit_Method=="Test_Train",Best_Model=="ALM") |> pull(id) %>% indv_predictive_plots(post_dat_l, .)
```
## Best Train - EXAM Fits
```{r fig.width=11, fig.height=12}
strong_fits <- full_comparison |> filter(model_rank<=1)
strong_fits |> filter(Fit_Method=="Train",Best_Model=="EXAM") |> pull(id) %>% indv_predictive_plots(post_dat_l, .)
```
## Best Train - ALM Fits
```{r fig.width=11, fig.height=12}
strong_fits <- full_comparison |> filter(model_rank<=1)
strong_fits |> filter(Fit_Method=="Train",Best_Model=="ALM") |> pull(id) %>% indv_predictive_plots(post_dat_l, .)
```
## Learning Curves
```{r fig.width=11, fig.height=16}
#| fig-cap: "Learning Curves"
#| fig-width: 11
#| fig-height: 13
pd_train_l |> ggplot(aes(x=Block,y=dist,col=Resp))+
geom_point( stat = "summary", fun = mean) +
stat_summary( geom = "line", fun = mean) +
stat_summary(geom="errorbar",fun.data=mean_se,width=.1,alpha=.4) +
#facet_wrap(condit~x,scales="free")
ggh4x::facet_nested_wrap(Fit_Method~condit~x,scales="free") +
#coord_cartesian(ylim = c(600, 1300)) +
labs(title="Full Posterior")
pd_train_l |>filter(rank==1) |> ggplot(aes(x=Block,y=val,col=Resp))+
geom_point( stat = "summary", fun = mean) +
stat_summary( geom = "line", fun = mean) +
stat_summary(geom="errorbar",fun.data=mean_se,width=.1,alpha=.4) +
#facet_wrap(condit~x,scales="free")
ggh4x::facet_nested_wrap(Fit_Method~condit~x,scales="free") +
coord_cartesian(ylim = c(600, 1300)) +labs(title="Best Parameters")
pd_train_l |> filter(Fit_Method=="Train") |>
mutate(x=as.factor(x), Resp=as.factor(Resp), Block=as.factor(Block)) |>
ggplot(aes(x=x,y=val,fill=Block))+
stat_bar +
#facet_wrap(condit~x,scales="free")
facet_wrap(condit~Resp,scales="free",ncol=3) +
coord_cartesian(ylim = c(600, 1300))
# pd_train_l |> filter(id %in% c(1,33,66, 36,76), Fit_Method=="Train") |>
# ggplot(aes(x=Block,y=val,col=Resp))+
# geom_point( stat = "summary", fun = mean) +
# stat_summary( geom = "line", fun = mean) +
# stat_summary(geom="errorbar",fun.data=mean_se,width=.1,alpha=.4) +
# #facet_wrap(condit~x,scales="free")
# facet_wrap(id~x,scales="free",ncol=3) +
# coord_cartesian(ylim = c(600, 1300))
pd_train_l |>
#filter(id %in% c(1,33,66, 36,76), Fit_Method=="Train") |>
mutate(x=as.factor(x), Resp=as.factor(Resp), Block=as.factor(Block)) |>
ggplot(aes(x=x,y=dist,fill=Block))+
stat_bar +
#facet_wrap(condit~x,scales="free")
#coord_cartesian(ylim = c(600, 1300)) +
facet_wrap(id~Resp,scales="free",ncol=3)
```
```{r}
#| eval: false
post_dat |> ggplot (aes (x= Model,y= mean_error,fill= Model))+ geom_col ()+ facet_wrap (~ Fit_Method)
post_dat |> ggplot (aes (x= Model,y= mean_error,fill= Model))+ geom_col ()+ facet_wrap (condit~ Fit_Method)
```
```{r}
# post_tabs <- map(post_tabs, ~cbind(.x,run_info) |> mutate(fn=file_name))
# post_tabs$agg_pred_full_train <- pt_train$agg_pred_full
# post_tabs$block_pred_full <- pt_train$block_pred_full
# post_tabs$block_pred_full_x <- pt_train$block_pred_full_x
# # path1 = "../../../data/abc_tabs"
# saveRDS(post_tabs, paste0(path1, "/", tools::file_path_sans_ext(basename(file_name)),"_post_tab", ".rds"))
# saveRDS(post_tabs, paste0(tools::file_path_sans_ext(basename(file_name)),"_post_tab", ".rds"))
```