Newer
Older
linguistic_assignments / 07_final_assignment / baboonSimulation.R
library(testthat)
library(ndl)
library(compiler)
library(fields)
library(parallel)
library(doParallel)
library(foreach)
enableJIT(3)

monkeydat <- data.frame(Monkey=c("DAN", "ART", "CAU", "DOR", "VIO", "ARI"),
                       NumTrials=c(56689, 50985, 61142, 49608, 43041, 55407),
                       NumWordsLearned=c(308, 125, 112, 121, 81, 87),
                       NumNonwordsPresented=c(7832, 7832, 7832, 7832, 7832, 7832),
                       GeneralAccuracy=c(79.81, 73.41, 72.43, 73.15, 71.55, 71.14),
                       WordAccuracy=c(80.01, 74.83, 73.15, 79.26, 76.75, 75.38),
                       NonwordAccuracy=c(79.61, 72.00, 71.72, 67.06, 66.33, 66.90),
                       stringsAsFactors=F)

Resample <- function(x){
    x[sample.int(length(x))]
}

test_that("Resample", {
   x <- 1:10
   expect_that(length(Resample(x[x >  8])), equals(2))
   expect_that(length(Resample(x[x >  9])), equals(1))
   expect_that(length(Resample(x[x >  10])), equals(0))
})

RandomPick <- function(data, n) {
    if ( is.data.frame(data) ){
        data[sample(nrow(data), n, replace=T),]
    } else {
        data[sample(length(data), n, replace=T)]
    }
}

test_that("RandomPick", {
    n <- 10

    bigger <- 1:(3 * n)
    equal <- 1:n
    smaller <- c(1)
    empty <- c()
    df <- data.frame(a=c(1,2,3), b=c(4,5,6))

    expect_equal(length(RandomPick(bigger, n)), n)
    expect_equal(length(RandomPick(equal, n)), n)
    expect_equal(length(RandomPick(smaller, n)), n)
    expect_equal(length(RandomPick(empty, n)), 0)
    expect_equal(nrow(RandomPick(df, n)), n)
})


CreateBlock <- function(data){
    result <- data.frame(Stimulus=character(BLOCK_SIZE),
                         StimulusType=factor(character(BLOCK_SIZE),
                                             c("LearnedWord","NewWord","Nonword")),
                         stringsAsFactors=F)
    data <- data[,c("Stimulus","StimulusType")]
    stopifnot(is.character(data$Stimulus))

    newSample <- RandomPick(data[data$StimulusType == "NewWord",], 1)
    learnedSample <- RandomPick(data[data$StimulusType == "LearnedWord",], 25)
    if( nrow(newSample) > 0 ) {
        result[1:25,] <- newSample
    } else {
        result[1:25,] <- learnedSample
    }
    if( nrow(learnedSample) > 0 ){
        result[26:50,] <- learnedSample
    } else {
        result[26:50,] <- newSample
    }
   
    result[51:100,] <- RandomPick(data[data$StimulusType == "Nonword",], 50)

    result <- result[sample(nrow(result)),]
    rownames(result) <- 1:nrow(result)
    result
}

BLOCK_SIZE <- 100
test_that("CreateBlock", {
    stim <- as.character(1:300)
    data <- data.frame(Stimulus=stim, stringsAsFactors=F)
    data$StimulusType[1:100] <- "NewWord"
    data$StimulusType[101:200] <- "LearnedWord"
    data$StimulusType[201:300] <- "Nonword"

    block <- CreateBlock(data)
    trialNewWords <- block$Stimulus[ block$StimulusType == "NewWord" ]
    trialLearnedWords <- block$Stimulus[ block$StimulusType == "LearnedWord" ]
    trialNonwords <- block$Stimulus[ block$StimulusType == "Nonword" ]

    expect_that(nrow(block), equals(BLOCK_SIZE))
    expect_that(length(trialNewWords), equals(25))
    expect_that(length(unique(trialNewWords)), equals(1))
    expect_that(length(trialLearnedWords), equals(25))
    expect_that(length(trialNonwords), equals(50))
    # TODO: Decide what should be expected for empty vectors?
})


########## DELETE

st.stimulus = character(trialCount)
st.stimulustype = character(trialCount)
st.block = numeric(trialCount)
st.trialinblock   = numeric(trialCount)
st.trial = numeric(trialCount)
st.response = character(trialCount)

#View(block[curTrialNumInBlock, ])
#asdfasdf = block[curTrialNumInBlock, ]
#stop("try")
st.stimulus[curTrialNum] = block[curTrialNumInBlock, ][1]
lev = unclass(block[curTrialNumInBlock, ][2])
st.stimulustype[curTrialNum] = ifelse(lev==1, "LearnedWord", ifelse(lev==2, "NewWord", "Nonword"))
st.block[curTrialNum] = block[curTrialNumInBlock, ][3]
st.trialinblock[curTrialNum]   = block[curTrialNumInBlock, ][4]
st.trial[curTrialNum] = block[curTrialNumInBlock, ][5]
st.response[curTrialNum] = block[curTrialNumInBlock, ][6]

#trials[curTrialNum, ] <- block[curTrialNumInBlock, ]  


trialsResult = data.frame(cbind(st.stimulus, st.stimulustype, st.block,
                                st.trialinblock, st.trial,
                                st.response))


colnames(trialsResult) <- c("Stimulus", "StimulusType", "Block",
                            "TrialInBlock", "Trial", "Response")


#trials <- data.frame(Stimulus=character(trialCount),
#                     StimulusType=character(trialCount),
#                     Block=numeric(trialCount),
#                     TrialInBlock=numeric(trialCount),
#                     Trial=numeric(trialCount),
#                     Response=character(trialCount))


######


PresentTrials <- function(trialCount, present, data){
  
  trials = data.frame()
  
  for ( curTrialNum in 1:trialCount ) {
    curBlock <- ( (curTrialNum - 1) %/% BLOCK_SIZE) + 1
    curTrialNumInBlock <- ( (curTrialNum - 1) %% BLOCK_SIZE ) + 1
    isNewBlock <- curTrialNumInBlock == 1
    
    if ( isNewBlock ) {
      if ( curBlock > 1 ){
        newWordTrials <- block[block$StimulusType == "NewWord", ]
        if( length(unique(newWordTrials$Stimulus)) == 1 ){
          newWordTrials <- newWordTrials[1, ]
          wordResponses <- newWordTrials[newWordTrials$Response == "Word", ]
          wordResponseRate <- nrow(wordResponses) / nrow(newWordTrials)
          if ( wordResponseRate >= 0.8  ){
            data$StimulusType[data$Stimulus == newWordTrials$Stimulus] <- "LearnedWord"
          }
        }
      }
      block <- CreateBlock(data)
      block$Block <- curBlock
      block$TrialInBlock <- 1:nrow(block)
    }
    
    curTrial <- block[curTrialNumInBlock,]
    curStim <- curTrial$Stimulus
    curStimIsWord <- present(curStim, ifelse(curTrial$StimulusType == "Nonword", "Nonword", "Word"))
    
    block$Trial[curTrialNumInBlock] <- curTrialNum
    block$Response[curTrialNumInBlock] <- curStimIsWord
    
    trials <- rbind(trials, block[curTrialNumInBlock, ])
  }
  
  rownames(trials) <- 1:trialCount
  
  result <- list(Trials=trials, Stimuli=data)
  
  return(result)
}

test_that("PresentTrials", {
    stim <- as.character(1:300)
    data <- data.frame(Stimulus=stim, stringsAsFactors=F)
    data$StimulusType[1:100] <- "NewWord"
    data$StimulusType[101:200] <- "LearnedWord"
    data$StimulusType[201:300] <- "Nonword"
    count <- BLOCK_SIZE * 2.5

    result <- PresentTrials(count,
                            function(cue, outcome){
                                return(outcome)
                            },
                            data)
    trials <- result$Trials
    stimuli <- result$Stimuli
    expect_equal(nrow(trials), count)
    expect_equal(length(stimuli$StimulusType[stimuli$StimulusType == "LearnedWord"]), 102)
})

# The original colSums fails if matrix filters to 1 row and gets implicit castet to a vector
ColSums <- function(x) { 
    if(is.matrix(x)){
       return(colSums(x))
    } else {
        return(x)
    }
}

test_that("ColSums",{
    m2x2 <- matrix(1,2,2)
    colnames(m2x2) <- c("a","b")

    expect_equal(ColSums(m2x2), c(a=2,b=2))
    expect_equal(ColSums(m2x2[1,]), c(a=1,b=1))
})

MakeMonkey <- function(cueset, outcomeset=c("Word", "Nonword"),
                       alpha=sqrt(0.001), beta=sqrt(0.001), lambda=1.0){
    weights <- matrix(0, length(cueset), length(outcomeset))
    rownames(weights) <- cueset
    colnames(weights) <- outcomeset

    rate <- function(cue){
        cues <- unlist(strsplit(cue, "_"))
        cueWeights <- weights[cues, outcomeset]
        totalActivation <- ColSums(cueWeights)

        maxActivated <- names(totalActivation[totalActivation == max(totalActivation)])
        return(RandomPick(maxActivated,1))
    }
    
    learner <- function(stim, resp){
        cues <- unlist(strsplit(stim, "_"))

        cueWeights <- weights[cues, outcomeset]
        totalActivation <- ColSums(cueWeights)
       
        for (j in 1:length(cues)) {
            if (resp == "Nonword") {
                yesType <- "Nonword"
                noType <- "Word"
            } else if (resp == "Word") {
                yesType <- "Word"
                noType <- "Nonword"
            } else {
                stop("Unknown outcome", resp)
            }
            weights[cues[j],yesType] <<-  weights[cues[j],yesType] +
                alpha * beta * (lambda - totalActivation[yesType])
            weights[cues[j],noType] <<-  weights[cues[j],noType] +
                alpha * beta * (0 - totalActivation[noType])
        }
        return(rate(stim))
    }

    give_weights <- function(){
        return(weights)
    }

    list(
        GetWeights=give_weights,
        Learner=learner,
        Rate=rate
    )
}

test_that("MakeMonkey",{
    cueset <- c("a","b","c")

    monkey <- MakeMonkey(cueset)
    for (i in 1:1000){
        monkey$Learner("a_b","Word")
        monkey$Learner("b_c","Nonword")
    }
 
    print(monkey$GetWeights())
    expect_equal(monkey$Rate("a"), "Word")
    expect_equal(monkey$Rate("c"), "Nonword")
})

prepareDat <- function(){
    dat <- read.table("dataDan.txt", header=T, stringsAsFactors=F)
    dat$Cues <- orthoCoding(dat$String, grams=2)
    dat$Frequency <- 1
    dat$Stimulus <- dat$Cues
    dat$StimulusType <- factor(ifelse(dat$Type == "word", "NewWord", "Nonword"), c("Nonword","NewWord","LearnedWord"))
    dat$Outcomes <- factor(ifelse(dat$Type == "word", "Word", "Nonword"), c("Word","Nonword"))
    dat <- dat[sample(1:nrow(dat)),]
    cues <- unique(unlist(strsplit(dat$Cues, "_")))
    outcomes <- levels(dat$Outcomes)
    list(cues=cues,outcomes=outcomes,dat=dat)
}

runSim <- function(data, trialCount, alpha, beta, lambda) {
    monkey <- MakeMonkey(data$cues, data$outcomes, alpha, beta, lambda=1.0)
    trialNum <- round(trialCount)
    pres <- PresentTrials(
                    trialNum,
                    monkey$Learner,data$dat)
    return(pres)
}

analyseSim <- function(pres){
    learnedWordNum <- nrow(pres$Stimuli[pres$Stimuli$StimulusType == "LearnedWord",])
    trialNum <- nrow(pres$Trials)
    nonwordTrials <- pres$Trials[pres$Trials$StimulusType == "Nonword",]
    wordTrials <- pres$Trials[pres$Trials$StimulusType != "Nonword",]
    presNonwordNum <- length(unique(nonwordTrials$Stimulus))
    presWordNum <- length(unique(wordTrials$Stimulus))
    nonwordAcc <- nrow(nonwordTrials[nonwordTrials$Response == "Nonword",]) / nrow(nonwordTrials)
    wordAcc <- nrow(nonwordTrials[wordTrials$Response == "Word",]) / nrow(wordTrials)
    genAcc <- (nrow(nonwordTrials) * nonwordAcc + nrow(wordTrials) * wordAcc) / trialNum

    c("Sim",
      trialNum,
      learnedWordNum,
      presNonwordNum,
      genAcc,
      wordAcc,
      nonwordAcc)
}

simulateAndAnalyse <- function(data, trialCount, alpha=sqrt(0.001), beta=sqrt(0.001)){
    p <- runSim(data, trialCount, alpha, beta, lambda=1)
    analyseSim(p)
}

data <- prepareDat()

testfunc = function() {
  n <- 10
  trialCount <- 50000
  for( a in 0:n) {
    k <- 1.0 / n * a
    print(k)
    #s <- simulateAndAnalyse(data, trialCount, alpha=1, beta=k)
    s <- simulateAndAnalyse(data, trialCount, alpha=0.000001, beta=0.00001)
    s[1] <- as.character(k)
    monkeydat <- rbind(monkeydat,
                       s)
  }
  return(monkeydat)
}


testfunc()

first.result = c()
system.time(first.result <- testfunc())

first.result
#  user  system elapsed 
# 10.05    0.00   10.06 

#   user  system elapsed 
#  17.39    0.03   17.87 


###############################################################################################

x = matrix(nrow=2, ncol=0)

for(a in seq(from=0.01, to=0.20, by=0.025)) {
  for(b in seq(from=0.01, to=a, by=0.025)) {
    x = cbind(x, c(a, b))
  }
}

l = 12
st.1 = x[, 1:l]
st.2 = x[, (l+1):(2*l)]
st.3 = x[, (2*l+1):(3*l)]

get.a = function(i, index) {
  if(i==1) return(st.1[1, index])
  if(i==2) return(st.2[1, index])
  if(i==3) return(st.3[1, index])
}

get.b = function(i, index) {
  if(i==1) return(st.1[2, index])
  if(i==2) return(st.2[2, index])
  if(i==3) return(st.3[2, index])
}

do.experiment = function(i) {
  filename = paste("resultdat", i, ".txt", sep="")
  resultdat <- data.frame(alpha=numeric(1), beta=numeric(1), NumTrials=numeric(1), NumWordsLearned=numeric(1),
                          NumNonwordsPresented=numeric(1), GeneralAccuracy=numeric(1),
                          WordAccuracy=numeric(1), NonwordAccuracy=numeric(1))
  write.table(resultdat, file=filename, col.names=names(resultdat))
  
  trialCount <- 50000 
      
  for(index in 1:l) {
    a = get.a(i, index)
    b = get.b(i, index)
    resultdat = read.table(filename)
    
    print(paste("i: ", i, " a: ", a, " b: ", b, sep="" ))
    s <- simulateAndAnalyse(data, trialCount, alpha=a, beta=b)
    s[1] <- as.character(trialCount)
    resultdat <- rbind(resultdat,
                       c(a, b, s[2:length(s)]))
    
    write.table(resultdat, file=filename, col.names=names(resultdat))
    
  }
  
  resultdat = resultdat[-1, ] #remove 1st row which is just always 0 
  
  write.table(resultdat, file=filename, col.names=names(resultdat))
  
  return(resultdat)
}


num.cores = detectCores() - 1 #don't use all if you want to do anything else
cl <- makeCluster(num.cores)
registerDoParallel(cl)

foreach(i=1:num.cores) %dopar% do.experiment(i)



###########################
View(resultdat)

res.matrix = matrix(nrow=length(a.seq), ncol=length(b.seq))
counter = 1
counter.a = 1
for( a in a.seq) {
  counter.b = 1
  for (b in b.seq) {
    res.matrix[counter.a, counter.b] = as.numeric(resultdat[counter, ]$GeneralAccuracy)
    counter = counter + 1
    counter.b = counter.b + 1
  }
  counter.a = counter.a + 1
}

View(res.matrix)

# TODO: Durchlaufen lassen fuer ca 50k Trials fuer verschiedene alpha, beta zwischen 0 und 1
# TODO: Plotten der verschiedenen Ergebnisse (num,acc) als heatmap 2d plot.
# TODO: Plotten der Unterschiede zu den Vorgabeaffen und optimale alpha beta fuer diese finden.

#mat1 = matrix(rexp(200, rate=.1), ncol=50, nrow=50)
#mat2 = matrix(rexp(200, rate=.1), ncol=50, nrow=50)

image.plot(res.matrix, main="Word Accuracy", xlab="alpha", ylab="beta",
           useRaster=TRUE, col = topo.colors(100))


View(m1)

dat03 = data.frame()
for(i in 0:3) {
  d = read.table(paste("resultdat", i, ".txt", sep=""))
  d = d[-1, ]
  dat03 = rbind(dat03, d)
}
View(dat03)

write.table(dat03, file="preliminary_results.txt", col.names=names(dat03))



# EOF