diff --git a/07_final_assignment/baboonSimulation.R b/07_final_assignment/baboonSimulation.R index 3a839df..e446b12 100644 --- a/07_final_assignment/baboonSimulation.R +++ b/07_final_assignment/baboonSimulation.R @@ -11,23 +11,28 @@ enableJIT(3) -######################################################################################## -#### DATA OF ORIGINAL EXPERIMENT -######################################################################################## +############################################################################# +#### DATA OF ORIGINAL EXPERIMENT +############################################################################# monkeydat <- data.frame(Monkey=c("DAN", "ART", "CAU", "DOR", "VIO", "ARI"), - NumTrials=c(56689, 50985, 61142, 49608, 43041, 55407), + NumTrials=c(56689, 50985, 61142, 49608, 43041, + 55407), NumWordsLearned=c(308, 125, 112, 121, 81, 87), - NumNonwordsPresented=c(7832, 7832, 7832, 7832, 7832, 7832), - GeneralAccuracy=c(79.81, 73.41, 72.43, 73.15, 71.55, 71.14), - WordAccuracy=c(80.01, 74.83, 73.15, 79.26, 76.75, 75.38), - NonwordAccuracy=c(79.61, 72.00, 71.72, 67.06, 66.33, 66.90), + NumNonwordsPresented=c(7832, 7832, 7832, 7832, + 7832, 7832), + GeneralAccuracy=c(79.81, 73.41, 72.43, 73.15, + 71.55, 71.14), + WordAccuracy=c(80.01, 74.83, 73.15, 79.26, 76.75, + 75.38), + NonwordAccuracy=c(79.61, 72.00, 71.72, 67.06, 66.33, + 66.90), stringsAsFactors=F) -######################################################################################## -#### DEFINING THE EXPERIMENTAL FUNCTIONS -######################################################################################## +############################################################################# +#### DEFINING THE EXPERIMENTAL FUNCTIONS +############################################################################# Resample <- function(x){ x[sample.int(length(x))] @@ -68,13 +73,15 @@ CreateBlock <- function(data){ result <- data.frame(Stimulus=character(BLOCK_SIZE), StimulusType=factor(character(BLOCK_SIZE), - c("LearnedWord","NewWord","Nonword")), + c("LearnedWord","NewWord", + "Nonword")), stringsAsFactors=F) data <- data[,c("Stimulus","StimulusType")] stopifnot(is.character(data$Stimulus)) newSample <- RandomPick(data[data$StimulusType == "NewWord",], 1) - learnedSample <- RandomPick(data[data$StimulusType == "LearnedWord",], 25) + learnedSample <- RandomPick(data[data$StimulusType == "LearnedWord",], + 25) if( nrow(newSample) > 0 ) { result[1:25,] <- newSample } else { @@ -86,7 +93,8 @@ result[26:50,] <- newSample } - result[51:100,] <- RandomPick(data[data$StimulusType == "Nonword",], 50) + result[51:100,] <- RandomPick(data[data$StimulusType == "Nonword",], + 50) result <- result[sample(nrow(result)),] rownames(result) <- 1:nrow(result) @@ -132,7 +140,8 @@ wordResponses <- newWordTrials[newWordTrials$Response == "Word", ] wordResponseRate <- nrow(wordResponses) / nrow(newWordTrials) if ( wordResponseRate >= 0.8 ){ - data$StimulusType[data$Stimulus == newWordTrials$Stimulus] <- "LearnedWord" + data$StimulusType[(data$Stimulus == + newWordTrials$Stimulus)] <- "LearnedWord" } } } @@ -143,7 +152,9 @@ curTrial <- block[curTrialNumInBlock,] curStim <- curTrial$Stimulus - curStimIsWord <- present(curStim, ifelse(curTrial$StimulusType == "Nonword", "Nonword", "Word")) + curStimIsWord <- present(curStim, + ifelse(curTrial$StimulusType == "Nonword", + "Nonword", "Word")) block$Trial[curTrialNumInBlock] <- curTrialNum block$Response[curTrialNumInBlock] <- curStimIsWord @@ -174,10 +185,12 @@ trials <- result$Trials stimuli <- result$Stimuli expect_equal(nrow(trials), count) - expect_equal(length(stimuli$StimulusType[stimuli$StimulusType == "LearnedWord"]), 102) + expect_equal(length( + stimuli$StimulusType[stimuli$StimulusType == "LearnedWord"]), 102) }) -# The original colSums fails if matrix filters to 1 row and gets implicit casted to a vector +# The original colSums fails if matrix filters to 1 row and gets +# implicit casted to a vector ColSums <- function(x) { if(is.matrix(x)){ return(colSums(x)) @@ -195,7 +208,8 @@ }) MakeMonkey <- function(cueset, outcomeset=c("Word", "Nonword"), - alpha=sqrt(0.001), beta=sqrt(0.001), lambda=1.0, randomRatio=0.2){ + alpha=sqrt(0.001), beta=sqrt(0.001), + lambda=1.0, randomRatio=0.2){ weights <- matrix(0, length(cueset), length(outcomeset)) rownames(weights) <- cueset colnames(weights) <- outcomeset @@ -205,7 +219,8 @@ cueWeights <- weights[cues, outcomeset] totalActivation <- ColSums(cueWeights) - maxActivated <- names(totalActivation[totalActivation == max(totalActivation)]) + maxActivated <- names( + totalActivation[totalActivation == max(totalActivation)]) return(RandomPick(maxActivated,1)) } randomLearner <- function(stim,resp){ @@ -275,8 +290,11 @@ dat$Cues <- orthoCoding(dat$String, grams=2) dat$Frequency <- 1 dat$Stimulus <- dat$Cues - dat$StimulusType <- factor(ifelse(dat$Type == "word", "NewWord", "Nonword"), c("Nonword","NewWord","LearnedWord")) - dat$Outcomes <- factor(ifelse(dat$Type == "word", "Word", "Nonword"), c("Word","Nonword")) + dat$StimulusType <- factor(ifelse(dat$Type == "word", + "NewWord", "Nonword"), + c("Nonword","NewWord","LearnedWord")) + dat$Outcomes <- factor(ifelse(dat$Type == "word", "Word", "Nonword"), + c("Word","Nonword")) dat <- dat[sample(1:nrow(dat)),] cues <- unique(unlist(strsplit(dat$Cues, "_"))) outcomes <- levels(dat$Outcomes) @@ -284,7 +302,8 @@ } runSim <- function(data, trialCount, alpha, beta, lambda=1, randomRatio) { - monkey <- MakeMonkey(data$cues, data$outcomes, alpha, beta, lambda=lambda, randomRatio=randomRatio) + monkey <- MakeMonkey(data$cues, data$outcomes, alpha, beta, lambda=lambda, + randomRatio=randomRatio) trialNum <- round(trialCount) pres <- PresentTrials( trialNum, @@ -293,15 +312,20 @@ } analyseSim <- function(pres){ - learnedWordNum <- nrow(pres$Stimuli[pres$Stimuli$StimulusType == "LearnedWord",]) + learnedWordNum <- nrow( + pres$Stimuli[pres$Stimuli$StimulusType == "LearnedWord",]) trialNum <- nrow(pres$Trials) nonwordTrials <- pres$Trials[pres$Trials$StimulusType == "Nonword",] wordTrials <- pres$Trials[pres$Trials$StimulusType != "Nonword",] presNonwordNum <- length(unique(nonwordTrials$Stimulus)) presWordNum <- length(unique(wordTrials$Stimulus)) - nonwordAcc <- nrow(nonwordTrials[nonwordTrials$Response == "Nonword",]) / nrow(nonwordTrials) - wordAcc <- nrow(nonwordTrials[wordTrials$Response == "Word",]) / nrow(wordTrials) - genAcc <- (nrow(nonwordTrials) * nonwordAcc + nrow(wordTrials) * wordAcc) / trialNum + nonwordAcc <- nrow( + nonwordTrials[nonwordTrials$Response == "Nonword",]) / + nrow(nonwordTrials) + wordAcc <- (nrow(nonwordTrials[wordTrials$Response == "Word",]) / + nrow(wordTrials)) + genAcc <- (nrow(nonwordTrials) * nonwordAcc + nrow(wordTrials) * + wordAcc) / trialNum c("Sim", trialNum, @@ -312,39 +336,18 @@ nonwordAcc) } -simulateAndAnalyse <- function(data, trialCount, alpha=sqrt(0.001), beta=sqrt(0.001), randomRatio=0.2){ +simulateAndAnalyse <- function(data, trialCount, alpha=sqrt(0.001), + beta=sqrt(0.001), randomRatio=0.2){ p <- runSim(data, trialCount, alpha, beta, lambda=1, randomRatio) analyseSim(p) } -######################################################################################## -#### IGNORE THIS BLOCK -######################################################################################## -if (FALSE) { - data <- prepareDat() - - testfunc = function() { - n <- 10 - trialCount <- 50000 - for( a in 0:n) { - k <- 1.0 / n * a - print(k) - #s <- simulateAndAnalyse(data, trialCount, alpha=1, beta=k) - s <- simulateAndAnalyse(data, trialCount, alpha=0.000001, beta=0.00001) - s[1] <- as.character(k) - monkeydat <- rbind(monkeydat, - s) - } - return(monkeydat) - } - - -} -######################################################################################## -#### RUNNING THE PARALLELIZED EXPERIMENTS -######################################################################################## + +############################################################################# +#### RUNNING THE PARALLELIZED EXPERIMENTS +############################################################################# # code for parallelizing the experiment. # Goal: Being able to run without a shared data structure @@ -354,8 +357,9 @@ # in the end. print.iterations <- FALSE #Note: Not meaningful for parallel execution, -#see http://blog.revolutionanalytics.com/2015/02/monitoring-progress-of-a-foreach-parallel-job.html -#for a discussion +# see http://blog.revolutionanalytics.com/2015/02/monitoring-progress- +# of-a-foreach-parallel-job.html +# for a discussion num.cores <- 15 #Can NOT be changed without further code adjustment monkeys.per.core <- 8 #Can NOT be changed without further code adjustment @@ -384,16 +388,21 @@ } get.a <- function(i, index) { - return((a.and.b.combinations[1, ((i*monkeys.per.core)-(monkeys.per.core-1)) : (i*monkeys.per.core)])[index]) + return((a.and.b.combinations[1, + ((i*monkeys.per.core)-(monkeys.per.core-1)) : + (i*monkeys.per.core)])[index]) } get.b <- function(i, index) { - return((a.and.b.combinations[2, ((i*monkeys.per.core)-(monkeys.per.core-1)) : (i*monkeys.per.core)])[index]) + return((a.and.b.combinations[2, + ((i*monkeys.per.core)-(monkeys.per.core-1)) : + (i*monkeys.per.core)])[index]) } do.experiment <- function(i) { resultdir <- "results" - dir.create(resultdir, showWarnings = FALSE) # dont show warning if dir exists + # don't show warning if dir exists + dir.create(resultdir, showWarnings = FALSE) filename <- paste(resultdir, "/", "resultdat", i, ".txt", sep="") if(debug) { @@ -424,7 +433,8 @@ resultdat = read.table(filename) if(print.iterations) { - print(paste("--running-- i: ", i, " a: ", a, " b: ", b, " r: ", r, sep="" )) + print(paste("--running-- i: ", i, " a: ", a, + " b: ", b, " r: ", r, sep="" )) } if(debug) { @@ -442,14 +452,16 @@ } else { - s <- simulateAndAnalyse(data, trialCount, alpha=a, beta=b, randomRatio=r ) + s <- simulateAndAnalyse(data, trialCount, alpha=a, beta=b, + randomRatio=r ) s[1] <- as.character(trialCount) resultdat <- rbind(resultdat, c(a, b, s[2:length(s)])) } - #write preliminary result to file -> in case of interruption nothing gets lost + #write preliminary result to file + # -> in case of interruption nothing gets lost write.table(resultdat, file=filename, col.names=names(resultdat)) } @@ -465,9 +477,9 @@ foreach(i=1:num.cores) %dopar% do.experiment(i) -######################################################################################## -#### HARVESTING DATA & VISUALIZATION I -######################################################################################## +############################################################################# +#### HARVESTING DATA & VISUALIZATION I +############################################################################# harvest.data <- function(write.to.file=FALSE, folder="results_backup") { data = data.frame() @@ -542,9 +554,9 @@ } -######################################################################################## -#### MODEL FITTING & VISUALIZATION II -######################################################################################## +############################################################################# +#### MODEL FITTING & VISUALIZATION II +############################################################################# ### GeneralAccuracy m1.g = gam(GeneralAccuracy ~ s(alpha) + s(beta), data=data) @@ -605,8 +617,8 @@ #m2 preferred vis.gam(m1.l, plot.type="contour", color="topo", main="NumWordsLearned") -#note: this plot shows a model which is not justified because m2 is preferred. -#it still is interesting, though. +#note: this plot shows a model which is not justified because +# m2 is preferred. It still is quite interesting, though. #explains the rather strange gam fit hist(data$NumWordsLearned, main="Histogram of NumWordsLearned", @@ -625,9 +637,9 @@ # ok -######################################################################################## -#### OTHER ANALYSIS SNIPPETS -######################################################################################## +############################################################################# +#### OTHER ANALYSIS SNIPPETS +############################################################################# @@ -681,9 +693,9 @@ anova(m2, m3) -######################################################################################## -#### EXPORT RESULTS TO LaTeX -######################################################################################## +############################################################################# +#### EXPORT RESULTS TO LaTeX +############################################################################# data.rounded = data