diff --git a/07_final_assignment/paper/code.tex b/07_final_assignment/paper/code.tex deleted file mode 100644 index e348795..0000000 --- a/07_final_assignment/paper/code.tex +++ /dev/null @@ -1,715 +0,0 @@ -%TODO: simply copy/paste code from R here. -\begin{lstlisting}[language=R] - -library(testthat) -library(ndl) -library(compiler) -library(fields) -library(parallel) -library(doParallel) -library(foreach) -library(itsadug) -library(mgcv) -library(xtable) -enableJIT(3) - - -######################################################################################## -#### DATA OF ORIGINAL EXPERIMENT -######################################################################################## - -monkeydat <- data.frame(Monkey=c("DAN", "ART", "CAU", "DOR", "VIO", "ARI"), - NumTrials=c(56689, 50985, 61142, 49608, 43041, 55407), - NumWordsLearned=c(308, 125, 112, 121, 81, 87), - NumNonwordsPresented=c(7832, 7832, 7832, 7832, 7832, 7832), - GeneralAccuracy=c(79.81, 73.41, 72.43, 73.15, 71.55, 71.14), - WordAccuracy=c(80.01, 74.83, 73.15, 79.26, 76.75, 75.38), - NonwordAccuracy=c(79.61, 72.00, 71.72, 67.06, 66.33, 66.90), - stringsAsFactors=F) - - -######################################################################################## -#### DEFINING THE EXPERIMENTAL FUNCTIONS -######################################################################################## - -Resample <- function(x){ - x[sample.int(length(x))] -} - -test_that("Resample", { - x <- 1:10 - expect_that(length(Resample(x[x > 8])), equals(2)) - expect_that(length(Resample(x[x > 9])), equals(1)) - expect_that(length(Resample(x[x > 10])), equals(0)) -}) - -RandomPick <- function(data, n) { - if ( is.data.frame(data) ){ - data[sample(nrow(data), n, replace=TRUE),] - } else { - data[sample(length(data), n, replace=TRUE)] - } -} - -test_that("RandomPick", { - n <- 10 - - bigger <- 1:(3 * n) - equal <- 1:n - smaller <- c(1) - empty <- c() - df <- data.frame(a=c(1,2,3), b=c(4,5,6)) - - expect_equal(length(RandomPick(bigger, n)), n) - expect_equal(length(RandomPick(equal, n)), n) - expect_equal(length(RandomPick(smaller, n)), n) - expect_equal(length(RandomPick(empty, n)), 0) - expect_equal(nrow(RandomPick(df, n)), n) -}) - - -CreateBlock <- function(data){ - result <- data.frame(Stimulus=character(BLOCK_SIZE), - StimulusType=factor(character(BLOCK_SIZE), - c("LearnedWord","NewWord","Nonword")), - stringsAsFactors=F) - data <- data[,c("Stimulus","StimulusType")] - stopifnot(is.character(data$Stimulus)) - - newSample <- RandomPick(data[data$StimulusType == "NewWord",], 1) - learnedSample <- RandomPick(data[data$StimulusType == "LearnedWord",], 25) - if( nrow(newSample) > 0 ) { - result[1:25,] <- newSample - } else { - result[1:25,] <- learnedSample - } - if( nrow(learnedSample) > 0 ){ - result[26:50,] <- learnedSample - } else { - result[26:50,] <- newSample - } - - result[51:100,] <- RandomPick(data[data$StimulusType == "Nonword",], 50) - - result <- result[sample(nrow(result)),] - rownames(result) <- 1:nrow(result) - result -} - -BLOCK_SIZE <- 100 -test_that("CreateBlock", { - stim <- as.character(1:300) - data <- data.frame(Stimulus=stim, stringsAsFactors=F) - data$StimulusType[1:100] <- "NewWord" - data$StimulusType[101:200] <- "LearnedWord" - data$StimulusType[201:300] <- "Nonword" - - block <- CreateBlock(data) - trialNewWords <- block$Stimulus[ block$StimulusType == "NewWord" ] - trialLearnedWords <- block$Stimulus[ block$StimulusType == "LearnedWord" ] - trialNonwords <- block$Stimulus[ block$StimulusType == "Nonword" ] - - expect_that(nrow(block), equals(BLOCK_SIZE)) - expect_that(length(trialNewWords), equals(25)) - expect_that(length(unique(trialNewWords)), equals(1)) - expect_that(length(trialLearnedWords), equals(25)) - expect_that(length(trialNonwords), equals(50)) - # TODO: Decide what should be expected for empty vectors? -}) - - -PresentTrials <- function(trialCount, present, data){ - - trials = data.frame() - - for ( curTrialNum in 1:trialCount ) { - curBlock <- ( (curTrialNum - 1) %/% BLOCK_SIZE) + 1 - curTrialNumInBlock <- ( (curTrialNum - 1) %% BLOCK_SIZE ) + 1 - isNewBlock <- curTrialNumInBlock == 1 - - if ( isNewBlock ) { - if ( curBlock > 1 ){ - newWordTrials <- block[block$StimulusType == "NewWord", ] - if( length(unique(newWordTrials$Stimulus)) == 1 ){ - newWordTrials <- newWordTrials[1, ] - wordResponses <- newWordTrials[newWordTrials$Response == "Word", ] - wordResponseRate <- nrow(wordResponses) / nrow(newWordTrials) - if ( wordResponseRate >= 0.8 ){ - data$StimulusType[data$Stimulus == newWordTrials$Stimulus] <- "LearnedWord" - } - } - } - block <- CreateBlock(data) - block$Block <- curBlock - block$TrialInBlock <- 1:nrow(block) - } - - curTrial <- block[curTrialNumInBlock,] - curStim <- curTrial$Stimulus - curStimIsWord <- present(curStim, ifelse(curTrial$StimulusType == "Nonword", "Nonword", "Word")) - - block$Trial[curTrialNumInBlock] <- curTrialNum - block$Response[curTrialNumInBlock] <- curStimIsWord - - trials <- rbind(trials, block[curTrialNumInBlock, ]) - } - - rownames(trials) <- 1:trialCount - - result <- list(Trials=trials, Stimuli=data) - - return(result) -} - -test_that("PresentTrials", { - stim <- as.character(1:300) - data <- data.frame(Stimulus=stim, stringsAsFactors=F) - data$StimulusType[1:100] <- "NewWord" - data$StimulusType[101:200] <- "LearnedWord" - data$StimulusType[201:300] <- "Nonword" - count <- BLOCK_SIZE * 2.5 - - result <- PresentTrials(count, - function(cue, outcome){ - return(outcome) - }, - data) - trials <- result$Trials - stimuli <- result$Stimuli - expect_equal(nrow(trials), count) - expect_equal(length(stimuli$StimulusType[stimuli$StimulusType == "LearnedWord"]), 102) -}) - -# The original colSums fails if matrix filters to 1 row and gets implicit casted to a vector -ColSums <- function(x) { - if(is.matrix(x)){ - return(colSums(x)) - } else { - return(x) - } -} - -test_that("ColSums",{ - m2x2 <- matrix(1,2,2) - colnames(m2x2) <- c("a","b") - - expect_equal(ColSums(m2x2), c(a=2,b=2)) - expect_equal(ColSums(m2x2[1,]), c(a=1,b=1)) -}) - -MakeMonkey <- function(cueset, outcomeset=c("Word", "Nonword"), - alpha=sqrt(0.001), beta=sqrt(0.001), lambda=1.0, randomRatio=0.2){ - weights <- matrix(0, length(cueset), length(outcomeset)) - rownames(weights) <- cueset - colnames(weights) <- outcomeset - - rate <- function(cue){ - cues <- unlist(strsplit(cue, "_")) - cueWeights <- weights[cues, outcomeset] - totalActivation <- ColSums(cueWeights) - - maxActivated <- names(totalActivation[totalActivation == max(totalActivation)]) - return(RandomPick(maxActivated,1)) - } - randomLearner <- function(stim,resp){ - c("Word","Nonword")[sample(1:2,1)] - } - - realLearner <- function(stim, resp){ - cues <- unlist(strsplit(stim, "_")) - - cueWeights <- weights[cues, outcomeset] - totalActivation <- ColSums(cueWeights) - - for (j in 1:length(cues)) { - if (resp == "Nonword") { - yesType <- "Nonword" - noType <- "Word" - } else if (resp == "Word") { - yesType <- "Word" - noType <- "Nonword" - } else { - stop("Unknown outcome", resp) - } - weights[cues[j],yesType] <<- weights[cues[j],yesType] + - alpha * beta * (lambda - totalActivation[yesType]) - weights[cues[j],noType] <<- weights[cues[j],noType] + - alpha * beta * (0 - totalActivation[noType]) - } - monkeyGuess <- rate(stim) - return(monkeyGuess) - } - - learner <- function(stim, resp) { - if( runif(1) < randomRatio ){ - randomLearner(stim,resp) - } else { - realLearner(stim,resp) - } - } - - give_weights <- function(){ - return(weights) - } - - return(list( - GetWeights=give_weights, - Learner=learner, - Rate=rate)) -} - - -test_that("MakeMonkey",{ - cueset <- c("a","b","c") - - monkey <- MakeMonkey(cueset) - for (i in 1:1000){ - monkey$Learner("a_b","Word") - monkey$Learner("b_c","Nonword") - } - - print(monkey$GetWeights()) - expect_equal(monkey$Rate("a"), "Word") - expect_equal(monkey$Rate("c"), "Nonword") -}) - -prepareDat <- function(){ - dat <- read.table("dataDan.txt", header=T, stringsAsFactors=F) - dat$Cues <- orthoCoding(dat$String, grams=2) - dat$Frequency <- 1 - dat$Stimulus <- dat$Cues - dat$StimulusType <- factor(ifelse(dat$Type == "word", "NewWord", "Nonword"), c("Nonword","NewWord","LearnedWord")) - dat$Outcomes <- factor(ifelse(dat$Type == "word", "Word", "Nonword"), c("Word","Nonword")) - dat <- dat[sample(1:nrow(dat)),] - cues <- unique(unlist(strsplit(dat$Cues, "_"))) - outcomes <- levels(dat$Outcomes) - list(cues=cues,outcomes=outcomes,dat=dat) -} - -runSim <- function(data, trialCount, alpha, beta, lambda=1, randomRatio) { - monkey <- MakeMonkey(data$cues, data$outcomes, alpha, beta, lambda=lambda, randomRatio=randomRatio) - trialNum <- round(trialCount) - pres <- PresentTrials( - trialNum, - monkey$Learner, data$dat) - return(pres) -} - -analyseSim <- function(pres){ - learnedWordNum <- nrow(pres$Stimuli[pres$Stimuli$StimulusType == "LearnedWord",]) - trialNum <- nrow(pres$Trials) - nonwordTrials <- pres$Trials[pres$Trials$StimulusType == "Nonword",] - wordTrials <- pres$Trials[pres$Trials$StimulusType != "Nonword",] - presNonwordNum <- length(unique(nonwordTrials$Stimulus)) - presWordNum <- length(unique(wordTrials$Stimulus)) - nonwordAcc <- nrow(nonwordTrials[nonwordTrials$Response == "Nonword",]) / nrow(nonwordTrials) - wordAcc <- nrow(nonwordTrials[wordTrials$Response == "Word",]) / nrow(wordTrials) - genAcc <- (nrow(nonwordTrials) * nonwordAcc + nrow(wordTrials) * wordAcc) / trialNum - - c("Sim", - trialNum, - learnedWordNum, - presNonwordNum, - genAcc, - wordAcc, - nonwordAcc) -} - -simulateAndAnalyse <- function(data, trialCount, alpha=sqrt(0.001), beta=sqrt(0.001), randomRatio=0.2){ - p <- runSim(data, trialCount, alpha, beta, lambda=1, randomRatio) - analyseSim(p) -} - - -######################################################################################## -#### IGNORE THIS BLOCK -######################################################################################## -if (FALSE) { - data <- prepareDat() - - testfunc = function() { - n <- 10 - trialCount <- 50000 - for( a in 0:n) { - k <- 1.0 / n * a - print(k) - #s <- simulateAndAnalyse(data, trialCount, alpha=1, beta=k) - s <- simulateAndAnalyse(data, trialCount, alpha=0.000001, beta=0.00001) - s[1] <- as.character(k) - monkeydat <- rbind(monkeydat, - s) - } - return(monkeydat) - } - - -} - -######################################################################################## -#### RUNNING THE PARALLELIZED EXPERIMENTS -######################################################################################## - -# code for parallelizing the experiment. -# Goal: Being able to run without a shared data structure -# and thus avoiding conflicts with critical sections. -# Solution: Reading and printing from and to different files -# for each thread (divide-and-conquer). Results need to be combined -# in the end. - -print.iterations <- FALSE #Note: Not meaningful for parallel execution, -#see http://blog.revolutionanalytics.com/2015/02/monitoring-progress-of-a-foreach-parallel-job.html -#for a discussion - -num.cores <- 15 #Can NOT be changed without further code adjustment -monkeys.per.core <- 8 #Can NOT be changed without further code adjustment - - -start.of.a.and.b <- 0.001 #Can be changed with no further code adjustment -end.of.a.and.b <- 0.300 #Can be changed with no further code adjustment - -debug <- FALSE - - -#8 experiments for each core, 15 cores -# -> 120 Experiments possible. -#This is exactly the number of experiments -#necessary to run 15 x 15 conditions in -#a symmetrical setting (= little Gauss of 15) - - -a.and.b.combinations <- matrix(nrow=2, ncol=0) -aseq <- seq(from=start.of.a.and.b, to=end.of.a.and.b, length.out=15) -step <- aseq[2] - aseq[1] -for(a in aseq) { - for(b in seq(from=start.of.a.and.b, to=a, by=step)) { - a.and.b.combinations <- cbind(a.and.b.combinations, c(a, b)) - } -} - -get.a <- function(i, index) { - return((a.and.b.combinations[1, ((i*monkeys.per.core)-(monkeys.per.core-1)) : (i*monkeys.per.core)])[index]) -} - -get.b <- function(i, index) { - return((a.and.b.combinations[2, ((i*monkeys.per.core)-(monkeys.per.core-1)) : (i*monkeys.per.core)])[index]) -} - -do.experiment <- function(i) { - resultdir <- "results" - dir.create(resultdir, showWarnings = FALSE) # dont show warning if dir exists - filename <- paste(resultdir, "/", "resultdat", i, ".txt", sep="") - - if(debug) { - resultdat <- data.frame(alpha=numeric(1), beta=numeric(1), - Time=character(1), - NumWordsLearned=numeric(1), - NumNonwordsPresented=numeric(1), - GeneralAccuracy=numeric(1), - WordAccuracy=numeric(1), - NonwordAccuracy=numeric(1)) - } else { - resultdat <- data.frame(alpha=numeric(1), beta=numeric(1), - NumTrials=numeric(1), - NumWordsLearned=numeric(1), - NumNonwordsPresented=numeric(1), - GeneralAccuracy=numeric(1), - WordAccuracy=numeric(1), - NonwordAccuracy=numeric(1)) - } - write.table(resultdat, file=filename, col.names=names(resultdat)) - - trialCount <- 50000 - r <- 0.65 - for(index in 1:monkeys.per.core) { - - a = get.a(i, index) - b = get.b(i, index) - resultdat = read.table(filename) - - if(print.iterations) { - print(paste("--running-- i: ", i, " a: ", a, " b: ", b, " r: ", r, sep="" )) - } - - if(debug) { - Sys.sleep(1) - current.time = Sys.time() - debug.frame = data.frame(alpha=a, beta=b, - Time=as.character(current.time), - NumWordsLearned=numeric(1), - NumNonwordsPresented=numeric(1), - GeneralAccuracy=numeric(1), - WordAccuracy=numeric(1), - NonwordAccuracy=numeric(1)) - - resultdat <- rbind(resultdat, debug.frame) - - } else { - - s <- simulateAndAnalyse(data, trialCount, alpha=a, beta=b, randomRatio=r ) - s[1] <- as.character(trialCount) - resultdat <- rbind(resultdat, - c(a, b, s[2:length(s)])) - - } - - #write preliminary result to file -> in case of interruption nothing gets lost - write.table(resultdat, file=filename, col.names=names(resultdat)) - } - - resultdat <- resultdat[-1, ] #remove 1st row which is just always 0 - write.table(resultdat, file=filename, col.names=names(resultdat)) - return(resultdat) -} - - -#...RUN, MONKEY, RUN! -cl <- makeCluster(num.cores) -registerDoParallel(cl) -foreach(i=1:num.cores) %dopar% do.experiment(i) - - -######################################################################################## -#### HARVESTING DATA & VISUALIZATION I -######################################################################################## - -harvest.data <- function(write.to.file=FALSE, folder="results_backup") { - data = data.frame() - for(i in 1:num.cores) { - d <- read.table(paste(folder, "resultdat", i, ".txt", sep="")) - data <- rbind(data, d) - } - if(write.to.file) { - write.table(data, file="preliminary_results.txt", col.names=names(data)) - } - return(data) -} - - -get.index <- function(sequence, value) { - res <- logical(length(sequence)) - for(i in 1:length(sequence)) { - res[i] <- isTRUE(all.equal(sequence[i], value)) - } - return(which(res)) -} - - -get.matrix <- function(data, value, sequence) { - if(!any(names(data)==value)) { - stop("'value' needs to be part of the data.frame") - } - v.index = which(names(data)==value) - - sequence <- round(sequence, 3) - - z <- matrix(nrow=length(sequence), ncol=length(sequence)) - - for(i in 1:nrow(data)) { - - alpha <- round(data[i, ]$alpha, 3) - beta <- round(data[i, ]$beta, 3) - - a.index <- get.index(sequence, alpha) - b.index <- get.index(sequence, beta) - if(isTRUE(all.equal(length(a.index), 0)) || - isTRUE(all.equal(length(b.index), 0))){ - print("problem: ") - print(alpha) - print(beta) - stop("sequence must contain alpha and beta values") - } - z[a.index, b.index] <- data[i, v.index] - z[b.index, a.index] <- data[i, v.index] - } - return(z) -} - -data = harvest.data(TRUE) - - -mat1 = matrix(rexp(15*15, rate=.1), ncol=15, nrow=15) - -conditions <- c("GeneralAccuracy", "WordAccuracy", - "NonwordAccuracy", "NumWordsLearned") - -for(c in conditions) { - image.plot(x=sort(aseq), - y=sort(aseq), - z=get.matrix(data, c, aseq), - #z = mat1, #alternative while results are not here yet - main=c, - xlab="alpha", - ylab="beta", - useRaster=TRUE, - col = topo.colors(100)) -} - - -######################################################################################## -#### MODEL FITTING & VISUALIZATION II -######################################################################################## - -### GeneralAccuracy -m1.g = gam(GeneralAccuracy ~ s(alpha) + s(beta), data=data) -summary(m1) - -m2.g = gam(GeneralAccuracy ~ s(alpha) + s(beta) + ti(alpha, beta), data=data) -summary(m2.g) - -compareML(m1.g, m2.g) -#m1 preferred - -vis.gam(m1.g, plot.type="contour", color="topo", main="GeneralAccuracy") - - - - -### WordAccuracy -m1.w = gam(WordAccuracy ~ s(alpha) + s(beta), data=data) -summary(m1.w) - -m2.w = gam(WordAccuracy ~ s(alpha) + s(beta) + ti(alpha, beta), data=data) -summary(m2.w) - -compareML(m1.w, m2.w) -#m1 preferred - -vis.gam(m1.w, plot.type="contour", color="topo", main="WordAccuracy") - - - -### NonwordAccuracy -m1.n = gam(NonwordAccuracy ~ s(alpha) + s(beta), data=data) -summary(m1.n) - -m2.n = gam(NonwordAccuracy ~ s(alpha) + s(beta) + ti(alpha, beta), data=data) -summary(m2.n) - -compareML(m1.n, m2.n) -#m1 preferred - -vis.gam(m1.n, plot.type="contour", color="topo", main="NonwordAccuracy") - - -### NumWordsLearned -m1.l = gam(NumWordsLearned ~ s(alpha) + s(beta), data=data) -summary(m1.l) - -m2.l = gam(NumWordsLearned ~ s(beta), data=data) -summary(m2.l) - -compareML(m1.l, m2.l) -#m2 preferred - -m3.l = gam(NumWordsLearned ~ te(alpha, beta), data=data) -summary(m3.l) - -compareML(m2.l, m3.l) -#m2 preferred - -vis.gam(m1.l, plot.type="contour", color="topo", main="NumWordsLearned") -#note: this plot shows a model which is not justified because m2 is preferred. -#it still is interesting, though. - -#explains the rather strange gam fit -hist(data$NumWordsLearned, main="Histogram of NumWordsLearned", - xlab="NumWordsLearned") - - -### model criticism -qqnorm(m1.g$residuals) -qqnorm(m1.w$residuals) -qqnorm(m1.n$residuals) -# mostly ok - -acf(resid(m1.g)) -acf(resid(m1.w)) -acf(resid(m1.n)) -# ok - - -######################################################################################## -#### OTHER ANALYSIS SNIPPETS -######################################################################################## - - - -#0. preprocessing -monkeydat$GeneralAccuracy = as.numeric(monkeydat$GeneralAccuracy) -monkeydat$WordAccuracy = as.numeric(monkeydat$WordAccuracy) -monkeydat$NonwordAccuracy = as.numeric(monkeydat$NonwordAccuracy) -monkeydat$NumTrials = as.numeric(monkeydat$NumTrials) -monkeydat$NumWordsLearned = as.numeric(monkeydat$NumWordsLearned) -monkeydat$NumNonwordsPresented = as.numeric(monkeydat$NumNonwordsPresented) - - -#1. chance level -chance.level <- r / 2 -chance.level - -max.accuracy <- 1-chance.level -max.accuracy - -max(data$GeneralAccuracy) -max(data$WordAccuracy) -max(data$NonwordAccuracy) - - -#2. correlations -#interesting to look at (& to write in the paper): -cor(data$WordAccuracy, data$NonwordAccuracy) -cor(monkeydat$WordAccuracy, monkeydat$NonwordAccuracy) -#why is this correlation so different??? - -cor(data$GeneralAccuracy, data$NonwordAccuracy) -cor(monkeydat$GeneralAccuracy, monkeydat$NonwordAccuracy) - -cor(data$GeneralAccuracy, data$WordAccuracy) -cor(monkeydat$GeneralAccuracy, monkeydat$NonwordAccuracy) - - -#3. linear model -#fitting a linear model: -m1 = lm(GeneralAccuracy ~ alpha, data=data) -summary(m1) - -m2 = lm(GeneralAccuracy ~ alpha + beta, data=data) -summary(m2) - -anova(m1, m2) - -m3 = lm(GeneralAccuracy ~ alpha * beta, data=data) -summary(m3) - -anova(m2, m3) - - -######################################################################################## -#### EXPORT RESULTS TO LaTeX -######################################################################################## - - -data.rounded = data -data.rounded$alpha = round(data.rounded$alpha, 3) -data.rounded$beta = round(data.rounded$beta, 3) -data.rounded$GeneralAccuracy = round(data.rounded$GeneralAccuracy, 2) -data.rounded$WordAccuracy = round(data.rounded$WordAccuracy, 2) -data.rounded$NonwordAccuracy = round(data.rounded$NonwordAccuracy, 2) -rownames(data.rounded) = NULL -colnames(data.rounded)[3] = "#Trials" -colnames(data.rounded)[4] = "#LearnedW" -colnames(data.rounded)[5] = "#PresW" -colnames(data.rounded)[6] = "GenAcc" -colnames(data.rounded)[7] = "WAcc" -colnames(data.rounded)[8] = "NWAcc" -View(data.rounded) - -xtable(data.rounded[1:40, ]) -xtable(data.rounded[41:80, ]) -xtable(data.rounded[81:120, ]) - -# EOF - - -\end{lstlisting} -\newpage \ No newline at end of file diff --git a/07_final_assignment/paper/main.tex b/07_final_assignment/paper/main.tex index c0cad0a..c0056e1 100644 --- a/07_final_assignment/paper/main.tex +++ b/07_final_assignment/paper/main.tex @@ -71,15 +71,10 @@ \onecolumn \input{result_tables.tex} - - -\section{Code} -\input{code.tex} - +\lstinputlisting[language=R]{../baboonSimulation.R} \printbibliography{} - \end{document} %%% Local Variables: %%% mode: latex