library(testthat)
library(ndl)
library(compiler)
library(fields)
library(parallel)
library(doParallel)
library(foreach)
library(itsadug)
library(mgcv)
enableJIT(3)
########################################################################################
#### DATA OF ORIGINAL EXPERIMENT
########################################################################################
monkeydat <- data.frame(Monkey=c("DAN", "ART", "CAU", "DOR", "VIO", "ARI"),
NumTrials=c(56689, 50985, 61142, 49608, 43041, 55407),
NumWordsLearned=c(308, 125, 112, 121, 81, 87),
NumNonwordsPresented=c(7832, 7832, 7832, 7832, 7832, 7832),
GeneralAccuracy=c(79.81, 73.41, 72.43, 73.15, 71.55, 71.14),
WordAccuracy=c(80.01, 74.83, 73.15, 79.26, 76.75, 75.38),
NonwordAccuracy=c(79.61, 72.00, 71.72, 67.06, 66.33, 66.90),
stringsAsFactors=F)
########################################################################################
#### DEFINING THE EXPERIMENTAL FUNCTIONS
########################################################################################
Resample <- function(x){
x[sample.int(length(x))]
}
test_that("Resample", {
x <- 1:10
expect_that(length(Resample(x[x > 8])), equals(2))
expect_that(length(Resample(x[x > 9])), equals(1))
expect_that(length(Resample(x[x > 10])), equals(0))
})
RandomPick <- function(data, n) {
if ( is.data.frame(data) ){
data[sample(nrow(data), n, replace=TRUE),]
} else {
data[sample(length(data), n, replace=TRUE)]
}
}
test_that("RandomPick", {
n <- 10
bigger <- 1:(3 * n)
equal <- 1:n
smaller <- c(1)
empty <- c()
df <- data.frame(a=c(1,2,3), b=c(4,5,6))
expect_equal(length(RandomPick(bigger, n)), n)
expect_equal(length(RandomPick(equal, n)), n)
expect_equal(length(RandomPick(smaller, n)), n)
expect_equal(length(RandomPick(empty, n)), 0)
expect_equal(nrow(RandomPick(df, n)), n)
})
CreateBlock <- function(data){
result <- data.frame(Stimulus=character(BLOCK_SIZE),
StimulusType=factor(character(BLOCK_SIZE),
c("LearnedWord","NewWord","Nonword")),
stringsAsFactors=F)
data <- data[,c("Stimulus","StimulusType")]
stopifnot(is.character(data$Stimulus))
newSample <- RandomPick(data[data$StimulusType == "NewWord",], 1)
learnedSample <- RandomPick(data[data$StimulusType == "LearnedWord",], 25)
if( nrow(newSample) > 0 ) {
result[1:25,] <- newSample
} else {
result[1:25,] <- learnedSample
}
if( nrow(learnedSample) > 0 ){
result[26:50,] <- learnedSample
} else {
result[26:50,] <- newSample
}
result[51:100,] <- RandomPick(data[data$StimulusType == "Nonword",], 50)
result <- result[sample(nrow(result)),]
rownames(result) <- 1:nrow(result)
result
}
BLOCK_SIZE <- 100
test_that("CreateBlock", {
stim <- as.character(1:300)
data <- data.frame(Stimulus=stim, stringsAsFactors=F)
data$StimulusType[1:100] <- "NewWord"
data$StimulusType[101:200] <- "LearnedWord"
data$StimulusType[201:300] <- "Nonword"
block <- CreateBlock(data)
trialNewWords <- block$Stimulus[ block$StimulusType == "NewWord" ]
trialLearnedWords <- block$Stimulus[ block$StimulusType == "LearnedWord" ]
trialNonwords <- block$Stimulus[ block$StimulusType == "Nonword" ]
expect_that(nrow(block), equals(BLOCK_SIZE))
expect_that(length(trialNewWords), equals(25))
expect_that(length(unique(trialNewWords)), equals(1))
expect_that(length(trialLearnedWords), equals(25))
expect_that(length(trialNonwords), equals(50))
# TODO: Decide what should be expected for empty vectors?
})
PresentTrials <- function(trialCount, present, data){
trials = data.frame()
for ( curTrialNum in 1:trialCount ) {
curBlock <- ( (curTrialNum - 1) %/% BLOCK_SIZE) + 1
curTrialNumInBlock <- ( (curTrialNum - 1) %% BLOCK_SIZE ) + 1
isNewBlock <- curTrialNumInBlock == 1
if ( isNewBlock ) {
if ( curBlock > 1 ){
newWordTrials <- block[block$StimulusType == "NewWord", ]
if( length(unique(newWordTrials$Stimulus)) == 1 ){
newWordTrials <- newWordTrials[1, ]
wordResponses <- newWordTrials[newWordTrials$Response == "Word", ]
wordResponseRate <- nrow(wordResponses) / nrow(newWordTrials)
if ( wordResponseRate >= 0.8 ){
data$StimulusType[data$Stimulus == newWordTrials$Stimulus] <- "LearnedWord"
}
}
}
block <- CreateBlock(data)
block$Block <- curBlock
block$TrialInBlock <- 1:nrow(block)
}
curTrial <- block[curTrialNumInBlock,]
curStim <- curTrial$Stimulus
curStimIsWord <- present(curStim, ifelse(curTrial$StimulusType == "Nonword", "Nonword", "Word"))
block$Trial[curTrialNumInBlock] <- curTrialNum
block$Response[curTrialNumInBlock] <- curStimIsWord
trials <- rbind(trials, block[curTrialNumInBlock, ])
}
rownames(trials) <- 1:trialCount
result <- list(Trials=trials, Stimuli=data)
return(result)
}
test_that("PresentTrials", {
stim <- as.character(1:300)
data <- data.frame(Stimulus=stim, stringsAsFactors=F)
data$StimulusType[1:100] <- "NewWord"
data$StimulusType[101:200] <- "LearnedWord"
data$StimulusType[201:300] <- "Nonword"
count <- BLOCK_SIZE * 2.5
result <- PresentTrials(count,
function(cue, outcome){
return(outcome)
},
data)
trials <- result$Trials
stimuli <- result$Stimuli
expect_equal(nrow(trials), count)
expect_equal(length(stimuli$StimulusType[stimuli$StimulusType == "LearnedWord"]), 102)
})
# The original colSums fails if matrix filters to 1 row and gets implicit casted to a vector
ColSums <- function(x) {
if(is.matrix(x)){
return(colSums(x))
} else {
return(x)
}
}
test_that("ColSums",{
m2x2 <- matrix(1,2,2)
colnames(m2x2) <- c("a","b")
expect_equal(ColSums(m2x2), c(a=2,b=2))
expect_equal(ColSums(m2x2[1,]), c(a=1,b=1))
})
MakeMonkey <- function(cueset, outcomeset=c("Word", "Nonword"),
alpha=sqrt(0.001), beta=sqrt(0.001), lambda=1.0, randomRatio=0.2){
weights <- matrix(0, length(cueset), length(outcomeset))
rownames(weights) <- cueset
colnames(weights) <- outcomeset
rate <- function(cue){
cues <- unlist(strsplit(cue, "_"))
cueWeights <- weights[cues, outcomeset]
totalActivation <- ColSums(cueWeights)
maxActivated <- names(totalActivation[totalActivation == max(totalActivation)])
return(RandomPick(maxActivated,1))
}
randomLearner <- function(stim,resp){
c("Word","Nonword")[sample(1:2,1)]
}
realLearner <- function(stim, resp){
cues <- unlist(strsplit(stim, "_"))
cueWeights <- weights[cues, outcomeset]
totalActivation <- ColSums(cueWeights)
for (j in 1:length(cues)) {
if (resp == "Nonword") {
yesType <- "Nonword"
noType <- "Word"
} else if (resp == "Word") {
yesType <- "Word"
noType <- "Nonword"
} else {
stop("Unknown outcome", resp)
}
weights[cues[j],yesType] <<- weights[cues[j],yesType] +
alpha * beta * (lambda - totalActivation[yesType])
weights[cues[j],noType] <<- weights[cues[j],noType] +
alpha * beta * (0 - totalActivation[noType])
}
monkeyGuess <- rate(stim)
return(monkeyGuess)
}
learner <- function(stim, resp) {
if( runif(1) < randomRatio ){
randomLearner(stim,resp)
} else {
realLearner(stim,resp)
}
}
give_weights <- function(){
return(weights)
}
return(list(
GetWeights=give_weights,
Learner=learner,
Rate=rate))
}
test_that("MakeMonkey",{
cueset <- c("a","b","c")
monkey <- MakeMonkey(cueset)
for (i in 1:1000){
monkey$Learner("a_b","Word")
monkey$Learner("b_c","Nonword")
}
print(monkey$GetWeights())
expect_equal(monkey$Rate("a"), "Word")
expect_equal(monkey$Rate("c"), "Nonword")
})
prepareDat <- function(){
dat <- read.table("dataDan.txt", header=T, stringsAsFactors=F)
dat$Cues <- orthoCoding(dat$String, grams=2)
dat$Frequency <- 1
dat$Stimulus <- dat$Cues
dat$StimulusType <- factor(ifelse(dat$Type == "word", "NewWord", "Nonword"), c("Nonword","NewWord","LearnedWord"))
dat$Outcomes <- factor(ifelse(dat$Type == "word", "Word", "Nonword"), c("Word","Nonword"))
dat <- dat[sample(1:nrow(dat)),]
cues <- unique(unlist(strsplit(dat$Cues, "_")))
outcomes <- levels(dat$Outcomes)
list(cues=cues,outcomes=outcomes,dat=dat)
}
runSim <- function(data, trialCount, alpha, beta, lambda=1, randomRatio) {
monkey <- MakeMonkey(data$cues, data$outcomes, alpha, beta, lambda=lambda, randomRatio=randomRatio)
trialNum <- round(trialCount)
pres <- PresentTrials(
trialNum,
monkey$Learner, data$dat)
return(pres)
}
analyseSim <- function(pres){
learnedWordNum <- nrow(pres$Stimuli[pres$Stimuli$StimulusType == "LearnedWord",])
trialNum <- nrow(pres$Trials)
nonwordTrials <- pres$Trials[pres$Trials$StimulusType == "Nonword",]
wordTrials <- pres$Trials[pres$Trials$StimulusType != "Nonword",]
presNonwordNum <- length(unique(nonwordTrials$Stimulus))
presWordNum <- length(unique(wordTrials$Stimulus))
nonwordAcc <- nrow(nonwordTrials[nonwordTrials$Response == "Nonword",]) / nrow(nonwordTrials)
wordAcc <- nrow(nonwordTrials[wordTrials$Response == "Word",]) / nrow(wordTrials)
genAcc <- (nrow(nonwordTrials) * nonwordAcc + nrow(wordTrials) * wordAcc) / trialNum
c("Sim",
trialNum,
learnedWordNum,
presNonwordNum,
genAcc,
wordAcc,
nonwordAcc)
}
simulateAndAnalyse <- function(data, trialCount, alpha=sqrt(0.001), beta=sqrt(0.001), randomRatio=0.2){
p <- runSim(data, trialCount, alpha, beta, lambda=1, randomRatio)
analyseSim(p)
}
########################################################################################
#### IGNORE THIS BLOCK
########################################################################################
if (FALSE) {
data <- prepareDat()
testfunc = function() {
n <- 10
trialCount <- 50000
for( a in 0:n) {
k <- 1.0 / n * a
print(k)
#s <- simulateAndAnalyse(data, trialCount, alpha=1, beta=k)
s <- simulateAndAnalyse(data, trialCount, alpha=0.000001, beta=0.00001)
s[1] <- as.character(k)
monkeydat <- rbind(monkeydat,
s)
}
return(monkeydat)
}
}
########################################################################################
#### RUNNING THE PARALLELIZED EXPERIMENTS
########################################################################################
# code for parallelizing the experiment.
# Goal: Being able to run without a shared data structure
# and thus avoiding conflicts with critical sections.
# Solution: Reading and printing from and to different files
# for each thread (divide-and-conquer). Results need to be combined
# in the end.
print.iterations <- FALSE #Note: Not meaningful for parallel execution,
#see http://blog.revolutionanalytics.com/2015/02/monitoring-progress-of-a-foreach-parallel-job.html
#for a discussion
num.cores <- 15 #Can NOT be changed without further code adjustment
monkeys.per.core <- 8 #Can NOT be changed without further code adjustment
start.of.a.and.b <- 0.001 #Can be changed with no further code adjustment
end.of.a.and.b <- 0.300 #Can be changed with no further code adjustment
debug <- FALSE
#8 experiments for each core, 15 cores
# -> 120 Experiments possible.
#This is exactly the number of experiments
#necessary to run 15 x 15 conditions in
#a symmetrical setting (= little Gauss of 15)
a.and.b.combinations <- matrix(nrow=2, ncol=0)
aseq <- seq(from=start.of.a.and.b, to=end.of.a.and.b, length.out=15)
step <- aseq[2] - aseq[1]
for(a in aseq) {
for(b in seq(from=start.of.a.and.b, to=a, by=step)) {
a.and.b.combinations <- cbind(a.and.b.combinations, c(a, b))
}
}
get.a <- function(i, index) {
return((a.and.b.combinations[1, ((i*monkeys.per.core)-(monkeys.per.core-1)) : (i*monkeys.per.core)])[index])
}
get.b <- function(i, index) {
return((a.and.b.combinations[2, ((i*monkeys.per.core)-(monkeys.per.core-1)) : (i*monkeys.per.core)])[index])
}
do.experiment <- function(i) {
resultdir <- "results"
dir.create(resultdir, showWarnings = FALSE) # dont show warning if dir exists
filename <- paste(resultdir, "/", "resultdat", i, ".txt", sep="")
if(debug) {
resultdat <- data.frame(alpha=numeric(1), beta=numeric(1),
Time=character(1),
NumWordsLearned=numeric(1),
NumNonwordsPresented=numeric(1),
GeneralAccuracy=numeric(1),
WordAccuracy=numeric(1),
NonwordAccuracy=numeric(1))
} else {
resultdat <- data.frame(alpha=numeric(1), beta=numeric(1),
NumTrials=numeric(1),
NumWordsLearned=numeric(1),
NumNonwordsPresented=numeric(1),
GeneralAccuracy=numeric(1),
WordAccuracy=numeric(1),
NonwordAccuracy=numeric(1))
}
write.table(resultdat, file=filename, col.names=names(resultdat))
trialCount <- 50000
r <- 0.65
for(index in 1:monkeys.per.core) {
a = get.a(i, index)
b = get.b(i, index)
resultdat = read.table(filename)
if(print.iterations) {
print(paste("--running-- i: ", i, " a: ", a, " b: ", b, " r: ", r, sep="" ))
}
if(debug) {
Sys.sleep(1)
current.time = Sys.time()
debug.frame = data.frame(alpha=a, beta=b,
Time=as.character(current.time),
NumWordsLearned=numeric(1),
NumNonwordsPresented=numeric(1),
GeneralAccuracy=numeric(1),
WordAccuracy=numeric(1),
NonwordAccuracy=numeric(1))
resultdat <- rbind(resultdat, debug.frame)
} else {
s <- simulateAndAnalyse(data, trialCount, alpha=a, beta=b, randomRatio=r )
s[1] <- as.character(trialCount)
resultdat <- rbind(resultdat,
c(a, b, s[2:length(s)]))
}
#write preliminary result to file -> in case of interruption nothing gets lost
write.table(resultdat, file=filename, col.names=names(resultdat))
}
resultdat <- resultdat[-1, ] #remove 1st row which is just always 0
write.table(resultdat, file=filename, col.names=names(resultdat))
return(resultdat)
}
#...RUN, MONKEY, RUN!
cl <- makeCluster(num.cores)
registerDoParallel(cl)
foreach(i=1:num.cores) %dopar% do.experiment(i)
########################################################################################
#### HARVESTING DATA & VISUALIZATION I
########################################################################################
harvest.data <- function(write.to.file=FALSE, folder="results_backup") {
data = data.frame()
for(i in 1:num.cores) {
d <- read.table(paste(folder, "resultdat", i, ".txt", sep=""))
data <- rbind(data, d)
}
if(write.to.file) {
write.table(data, file="preliminary_results.txt", col.names=names(data))
}
return(data)
}
get.index <- function(sequence, value) {
res <- logical(length(sequence))
for(i in 1:length(sequence)) {
res[i] <- isTRUE(all.equal(sequence[i], value))
}
return(which(res))
}
get.matrix <- function(data, value, sequence) {
if(!any(names(data)==value)) {
stop("'value' needs to be part of the data.frame")
}
v.index = which(names(data)==value)
sequence <- round(sequence, 3)
z <- matrix(nrow=length(sequence), ncol=length(sequence))
for(i in 1:nrow(data)) {
alpha <- round(data[i, ]$alpha, 3)
beta <- round(data[i, ]$beta, 3)
a.index <- get.index(sequence, alpha)
b.index <- get.index(sequence, beta)
if(isTRUE(all.equal(length(a.index), 0)) ||
isTRUE(all.equal(length(b.index), 0))){
print("problem: ")
print(alpha)
print(beta)
stop("sequence must contain alpha and beta values")
}
z[a.index, b.index] <- data[i, v.index]
z[b.index, a.index] <- data[i, v.index]
}
return(z)
}
data = harvest.data(TRUE)
mat1 = matrix(rexp(15*15, rate=.1), ncol=15, nrow=15)
conditions <- c("GeneralAccuracy", "WordAccuracy", "NonwordAccuracy")
for(c in conditions) {
image.plot(x=sort(aseq),
y=sort(aseq),
z=get.matrix(data, c, aseq),
#z = mat1, #alternative while results are not here yet
main=c,
xlab="alpha",
ylab="beta",
useRaster=TRUE,
col = topo.colors(100))
}
########################################################################################
#### MODEL FITTING & VISUALIZATION II
########################################################################################
### GeneralAccuracy
m1.g = gam(GeneralAccuracy ~ s(alpha) + s(beta), data=data)
summary(m1)
m2.g = gam(GeneralAccuracy ~ s(alpha) + s(beta) + ti(alpha, beta), data=data)
summary(m2.g)
compareML(m1.g, m2.g)
#m1 preferred
vis.gam(m1.g, plot.type="contour", color="topo", main="GeneralAccuracy")
### WordAccuracy
m1.w = gam(WordAccuracy ~ s(alpha) + s(beta), data=data)
summary(m1.w)
m2.w = gam(WordAccuracy ~ s(alpha) + s(beta) + ti(alpha, beta), data=data)
summary(m2.w)
compareML(m1.w, m2.w)
#m1 preferred
### NonwordAccuracy
m1.n = gam(NonwordAccuracy ~ s(alpha) + s(beta), data=data)
summary(m1.n)
m2.n = gam(NonwordAccuracy ~ s(alpha) + s(beta) + ti(alpha, beta), data=data)
summary(m2.n)
compareML(m1.n, m2.n)
#m1 preferred
vis.gam(m1.n, plot.type="contour", color="topo", main="NonwordAccuracy")
### NumWordsLearned
m1.l = gam(NumWordsLearned ~ s(alpha) + s(beta), data=data)
summary(m1.l)
m2.l = gam(NumWordsLearned ~ s(beta), data=data)
summary(m2.l)
compareML(m1.l, m2.l)
#m2 preferred
m3.l = gam(NumWordsLearned ~ te(alpha, beta), data=data)
summary(m3.l)
compareML(m2.l, m3.l)
#m2 preferred
vis.gam(m1.l, plot.type="contour", color="topo", main="NumWordsLearned")
#note: this plot shows a model which is not justified because m2 is preferred.
#it still is interesting, though.
### model criticism
qqnorm(m1.g$residuals)
qqnorm(m1.w$residuals)
qqnorm(m1.n$residuals)
# mostly ok
acf(resid(m1.g))
acf(resid(m1.w))
acf(resid(m1.n))
# ok
########################################################################################
#### OTHER ANALYSIS SNIPPETS
########################################################################################
#0. preprocessing
monkeydat$GeneralAccuracy = as.numeric(monkeydat$GeneralAccuracy)
monkeydat$WordAccuracy = as.numeric(monkeydat$WordAccuracy)
monkeydat$NonwordAccuracy = as.numeric(monkeydat$NonwordAccuracy)
monkeydat$NumTrials = as.numeric(monkeydat$NumTrials)
monkeydat$NumWordsLearned = as.numeric(monkeydat$NumWordsLearned)
monkeydat$NumNonwordsPresented = as.numeric(monkeydat$NumNonwordsPresented)
#1. chance level
chance.level <- r / 2
chance.level
max.accuracy <- 1-chance.level
max.accuracy
max(data$GeneralAccuracy)
max(data$WordAccuracy)
max(data$NonwordAccuracy)
#2. correlations
#interesting to look at (& to write in the paper):
cor(data$WordAccuracy, data$NonwordAccuracy)
cor(monkeydat$WordAccuracy, monkeydat$NonwordAccuracy)
#why is this correlation so different???
cor(data$GeneralAccuracy, data$NonwordAccuracy)
cor(monkeydat$GeneralAccuracy, monkeydat$NonwordAccuracy)
cor(data$GeneralAccuracy, data$WordAccuracy)
cor(monkeydat$GeneralAccuracy, monkeydat$NonwordAccuracy)
#3. linear model
#fitting a linear model:
m1 = lm(GeneralAccuracy ~ alpha, data=data)
summary(m1)
m2 = lm(GeneralAccuracy ~ alpha + beta, data=data)
summary(m2)
anova(m1, m2)
m3 = lm(GeneralAccuracy ~ alpha * beta, data=data)
summary(m3)
anova(m2, m3)
# EOF