M1 Max times: tf user system elapsed 221.326 4.517 226.233
ts user system elapsed 221.140 4.288 225.330
Mac Pro times: tf user system elapsed 1125.102 76.859 1474.612
ts user system elapsed 1132.716 76.260 1493.154
Benchmarking ALM Model Fit Functions
Code
pacman::p_load(tidyverse,data.table,microbenchmark())d<-readRDS(here('dPrune-01-19-23.rds'))dtest<-d%>%filter(expMode%in%c("test-Nf","test-train-nf"))%>%group_by(id,lowBound)%>%mutate(nBand=n(),band=bandInt,id=factor(id))%>%group_by(id)%>%mutate(nd=n_distinct(lowBound))# unique(dtest[dtest$nd==4,]$sbjCode) # 7 in wrong conditiondtest<-dtest%>%group_by(id,lowBound)%>%filter(nBand>=5&nd==6)# for any id that has at least 1 nBand >=5, remove all rows with that id. dtest<-dtest%>%group_by(id)%>%filter(!id%in%unique(dtest$id[dtest$nBand<5]))dtestAgg<-dtest%>%group_by(id,condit,catOrder,feedbackType,vb,band,lowBound,highBound,input)%>%mutate(vxCapped=ifelse(vx>1600,1600,vx))%>%summarise(vxMean=mean(vx),devMean=mean(dist),vxMed=median(vx),devMed=median(dist), vxMeanCap=mean(vxCapped),.groups ="keep")# select first row for each id in d, then create histogram for nTrain# d %>% group_by(id) %>% slice(1) %>% ggplot(aes(nTrain)) + geom_histogram() + facet_wrap(~condit)ds<-d%>%filter(expMode%in%c("train","train-Nf","test-Nf","test-train-nf"))%>%filter(!id%in%unique(dtest$id[dtest$nBand<5]))%>%select(id,condit,catOrder,feedbackType,expMode,trial,gt.train,vb,band,bandInt,lowBound,highBound,input,vx,dist,vxb)dst<-ds%>%filter(expMode=="train",catOrder=="orig")vTrainTrial<-dst%>%filter(condit=="Varied",gt.train<=84)%>%group_by(gt.train,vb)%>%summarise(sdVx=sd(vx),vx=mean(vx),sdDist=sd(dist),dist=mean(dist))%>%group_by(vb)%>%mutate(gt.trainBin=cut(gt.train,breaks=5,labels=c(1:5)))binTrainTrial<-dst%>%filter(gt.train<=83)%>%group_by(gt.train,vb,condit)%>%summarise(sdVx=sd(vx),vx=mean(vx),sdDist=sd(dist),dist=mean(dist))%>%group_by(vb)%>%mutate(gt.trainBin=cut(gt.train,breaks=6,labels=c(1:6)))tMax=84bandVec<-rep(c(800,1000,1200),each=tMax/3)bandVec<-bandVec[sample(1:length(bandVec),tMax,replace=FALSE)]trainTrials<-dst%>%filter(gt.train<=tMax)%>%group_by(condit,gt.train,vb,bandInt,input)%>%summarise(vx=mean(vx))input.activation<-function(x.target, c){return(exp(-1*c*(x.target-inputNodes)^2))}output.activation<-function(x.target, weights, c){return(weights%*%input.activation(x.target, c))}mean.prediction<-function(x.target, weights, association.parameter){probability<-output.activation(x.target, weights, c)/sum(output.activation(x.target, weights, c))return(outputNodes%*%probability)# integer prediction}# function to generate exam predictionsexam.prediction<-function(x.target, weights, c,trainVec){#trainVec = sort(unique(x.learning))nearestTrain=trainVec[which.min(abs(trainVec-x.target))]aresp=mean.prediction(nearestTrain, weights, c)xUnder=ifelse(min(trainVec)==nearestTrain, nearestTrain, trainVec[which(trainVec==nearestTrain)-1])xOver=ifelse(max(trainVec)==nearestTrain, nearestTrain, trainVec[which(trainVec==nearestTrain)+1])mUnder=mean.prediction(xUnder, weights, c)mOver=mean.prediction(xOver, weights, c)exam.output=round(aresp+((mOver-mUnder)/(xOver-xUnder))*(x.target-nearestTrain), 3)exam.output}update.weights<-function(x.new, y.new, weights, c, lr){y.feedback.activation<-exp(-1*c*(y.new-outputNodes)^2)x.feedback.activation<-output.activation(x.new, weights, c)return(weights+lr*(y.feedback.activation-x.feedback.activation)%*%t(input.activation(x.new, c)))}train.alm<-function(dat, c=0.05, lr=0.5, weights){alm.train<-rep(NA,nrow(dat))for(iin1:nrow(dat)){weights<-update.weights(dat$input[i], dat$vx[i], weights, c, lr)resp=mean.prediction(dat$input[i], weights, c)alm.train[i]=respweights[weights<0]=0}alm.train}wrap_alm<-function(parms,dat, weights,lossFun){c=parms[1]; lr=parms[2]pred=train.alm(dat, c=c, lr=lr, weights=weights)#sqrt(mean((dat$vx -pred)^2))lossFun(dat$vx,pred)}wrap_optim<-function(dat,wm,lossFun){bounds_lower<-c(.0000001, .00001)bounds_upper<-c(5, 5)optim(c(.1, .2), fn =wrap_alm, dat =dat, weights =wm,lossFun=lossFun, method ="L-BFGS-B", lower =bounds_lower, upper =bounds_upper, control =list(maxit =1e4, pgtol =0, factr =0))}RMSE<-function(x,y){sqrt(mean((x-y)^2))}## First average observed and predicted data into blocks, then compute RMSERMSE.tb<-function(x,y,blocks=6){data.frame(x,y)%>%mutate(t=row_number(),fitBins=cut(t,breaks=blocks,labels=c(1:blocks)))%>%group_by(fitBins)%>%summarise(predMean=mean(x),obsMean=mean(y))%>%summarise(RMSE(predMean,obsMean))%>%as.numeric()}## Recode RMSE.tb using data.table functions rather than dplyrRMSE.tb2<-function(x,y,blocks=6){data.table(x=x,y=y,t=seq(1,length(x)))%>%.[, `:=`(fitBins =cut(t, breaks =..blocks, labels =c(1:..blocks)))]%>%.[, .(predMean =mean(x), obsMean =mean(y)), keyby =.(fitBins)]%>%.[, RMSE(predMean,obsMean)]%>%as.numeric()}
The data.table version seems to be consistently more than 2x faster
Separate fits, vs. nesting method vs. split method
Code
nestSplit<-microbenchmark(separate={fitVaried<-tv%>%filter(condit=="Varied")%>%wrap_optim(.,wm,lossFun=RMSE.tb2);fitConstant<-tv%>%filter(condit=="Constant")%>%wrap_optim(.,wm,lossFun=RMSE.tb2)},nestGroups =tv%>%group_by(condit)%>%nest()%>%mutate(fit=map(data,~wrap_optim(.,wm,RMSE.tb2))),splitGroups =tv%>%split(.$condit)%>%map(~wrap_optim(.,wm,RMSE.tb2)),times=5)knitr::kable(summary(nestSplit),format="markdown")# 03/02/23 - Mac Pro
expr
min
lq
mean
median
uq
max
neval
cld
separate
6.394459
6.483235
6.709882
6.508353
7.059420
7.103941
5
a
nestGroups
6.253850
6.457956
6.789962
6.756942
7.044965
7.436094
5
a
splitGroups
6.117184
6.455866
6.605814
6.461572
6.640798
7.353648
5
a
Not much of an effect for nesting method.
Computing RMSE over Raw trials vs. RMSE of blocked training performance
The models are fit about 2x faster when computing RMSE over trials. This shouldn’t be surprising, since fitting to blocked data requires several additional computations (e.g. grouping, computing means)