linguistic_assignments/07_final_assignment/baboonSimulation.R at 481d3a806919ed1f452ba3116e7b256a16a516ca

Fork: 0
dek / linguistic_assignments
Find file
Newer
Older
linguistic_assignments / 07_final_assignment / baboonSimulation.R
David-Elias Kuenstle on 20 Mar 2016 16 KB Write results in a subdirectory
Raw Blame History
library(testthat)
library(ndl)
library(compiler)
library(fields)
library(parallel)
library(doParallel)
library(foreach)
enableJIT(3)


########################################################################################
####                           DATA OF ORIGINAL EXPERIMENT
########################################################################################

monkeydat <- data.frame(Monkey=c("DAN", "ART", "CAU", "DOR", "VIO", "ARI"),
                        NumTrials=c(56689, 50985, 61142, 49608, 43041, 55407),
                        NumWordsLearned=c(308, 125, 112, 121, 81, 87),
                        NumNonwordsPresented=c(7832, 7832, 7832, 7832, 7832, 7832),
                        GeneralAccuracy=c(79.81, 73.41, 72.43, 73.15, 71.55, 71.14),
                        WordAccuracy=c(80.01, 74.83, 73.15, 79.26, 76.75, 75.38),
                        NonwordAccuracy=c(79.61, 72.00, 71.72, 67.06, 66.33, 66.90),
                        stringsAsFactors=F)


########################################################################################
####                           DEFINING THE EXPERIMENTAL FUNCTIONS
########################################################################################

Resample <- function(x){
  x[sample.int(length(x))]
}

test_that("Resample", {
  x <- 1:10
  expect_that(length(Resample(x[x >  8])), equals(2))
  expect_that(length(Resample(x[x >  9])), equals(1))
  expect_that(length(Resample(x[x >  10])), equals(0))
})

RandomPick <- function(data, n) {
  if ( is.data.frame(data) ){
    data[sample(nrow(data), n, replace=TRUE),]
  } else {
    data[sample(length(data), n, replace=TRUE)]
  }
}

test_that("RandomPick", {
  n <- 10
  
  bigger <- 1:(3 * n)
  equal <- 1:n
  smaller <- c(1)
  empty <- c()
  df <- data.frame(a=c(1,2,3), b=c(4,5,6))
  
  expect_equal(length(RandomPick(bigger, n)), n)
  expect_equal(length(RandomPick(equal, n)), n)
  expect_equal(length(RandomPick(smaller, n)), n)
  expect_equal(length(RandomPick(empty, n)), 0)
  expect_equal(nrow(RandomPick(df, n)), n)
})


CreateBlock <- function(data){
  result <- data.frame(Stimulus=character(BLOCK_SIZE),
                       StimulusType=factor(character(BLOCK_SIZE),
                                           c("LearnedWord","NewWord","Nonword")),
                       stringsAsFactors=F)
  data <- data[,c("Stimulus","StimulusType")]
  stopifnot(is.character(data$Stimulus))
  
  newSample <- RandomPick(data[data$StimulusType == "NewWord",], 1)
  learnedSample <- RandomPick(data[data$StimulusType == "LearnedWord",], 25)
  if( nrow(newSample) > 0 ) {
    result[1:25,] <- newSample
  } else {
    result[1:25,] <- learnedSample
  }
  if( nrow(learnedSample) > 0 ){
    result[26:50,] <- learnedSample
  } else {
    result[26:50,] <- newSample
  }
  
  result[51:100,] <- RandomPick(data[data$StimulusType == "Nonword",], 50)
  
  result <- result[sample(nrow(result)),]
  rownames(result) <- 1:nrow(result)
  result
}

BLOCK_SIZE <- 100
test_that("CreateBlock", {
  stim <- as.character(1:300)
  data <- data.frame(Stimulus=stim, stringsAsFactors=F)
  data$StimulusType[1:100] <- "NewWord"
  data$StimulusType[101:200] <- "LearnedWord"
  data$StimulusType[201:300] <- "Nonword"
  
  block <- CreateBlock(data)
  trialNewWords <- block$Stimulus[ block$StimulusType == "NewWord" ]
  trialLearnedWords <- block$Stimulus[ block$StimulusType == "LearnedWord" ]
  trialNonwords <- block$Stimulus[ block$StimulusType == "Nonword" ]
  
  expect_that(nrow(block), equals(BLOCK_SIZE))
  expect_that(length(trialNewWords), equals(25))
  expect_that(length(unique(trialNewWords)), equals(1))
  expect_that(length(trialLearnedWords), equals(25))
  expect_that(length(trialNonwords), equals(50))
  # TODO: Decide what should be expected for empty vectors?
})


PresentTrials <- function(trialCount, present, data){
  
  trials = data.frame()
  
  for ( curTrialNum in 1:trialCount ) {
    curBlock <- ( (curTrialNum - 1) %/% BLOCK_SIZE) + 1
    curTrialNumInBlock <- ( (curTrialNum - 1) %% BLOCK_SIZE ) + 1
    isNewBlock <- curTrialNumInBlock == 1
    
    if ( isNewBlock ) {
      if ( curBlock > 1 ){
        newWordTrials <- block[block$StimulusType == "NewWord", ]
        if( length(unique(newWordTrials$Stimulus)) == 1 ){
          newWordTrials <- newWordTrials[1, ]
          wordResponses <- newWordTrials[newWordTrials$Response == "Word", ]
          wordResponseRate <- nrow(wordResponses) / nrow(newWordTrials)
          if ( wordResponseRate >= 0.8  ){
            data$StimulusType[data$Stimulus == newWordTrials$Stimulus] <- "LearnedWord"
          }
        }
      }
      block <- CreateBlock(data)
      block$Block <- curBlock
      block$TrialInBlock <- 1:nrow(block)
    }
    
    curTrial <- block[curTrialNumInBlock,]
    curStim <- curTrial$Stimulus
    curStimIsWord <- present(curStim, ifelse(curTrial$StimulusType == "Nonword", "Nonword", "Word"))
    
    block$Trial[curTrialNumInBlock] <- curTrialNum
    block$Response[curTrialNumInBlock] <- curStimIsWord
    
    trials <- rbind(trials, block[curTrialNumInBlock, ])
  }
  
  rownames(trials) <- 1:trialCount
  
  result <- list(Trials=trials, Stimuli=data)
  
  return(result)
}

test_that("PresentTrials", {
  stim <- as.character(1:300)
  data <- data.frame(Stimulus=stim, stringsAsFactors=F)
  data$StimulusType[1:100] <- "NewWord"
  data$StimulusType[101:200] <- "LearnedWord"
  data$StimulusType[201:300] <- "Nonword"
  count <- BLOCK_SIZE * 2.5
  
  result <- PresentTrials(count,
                          function(cue, outcome){
                            return(outcome)
                          },
                          data)
  trials <- result$Trials
  stimuli <- result$Stimuli
  expect_equal(nrow(trials), count)
  expect_equal(length(stimuli$StimulusType[stimuli$StimulusType == "LearnedWord"]), 102)
})

# The original colSums fails if matrix filters to 1 row and gets implicit casted to a vector
ColSums <- function(x) { 
  if(is.matrix(x)){
    return(colSums(x))
  } else {
    return(x)
  }
}

test_that("ColSums",{
  m2x2 <- matrix(1,2,2)
  colnames(m2x2) <- c("a","b")
  
  expect_equal(ColSums(m2x2), c(a=2,b=2))
  expect_equal(ColSums(m2x2[1,]), c(a=1,b=1))
})

MakeMonkey <- function(cueset, outcomeset=c("Word", "Nonword"),
                       alpha=sqrt(0.001), beta=sqrt(0.001), lambda=1.0, randomRatio=0.2){
  weights <- matrix(0, length(cueset), length(outcomeset))
  rownames(weights) <- cueset
  colnames(weights) <- outcomeset
  
  rate <- function(cue){
    cues <- unlist(strsplit(cue, "_"))
    cueWeights <- weights[cues, outcomeset]
    totalActivation <- ColSums(cueWeights)
    
    maxActivated <- names(totalActivation[totalActivation == max(totalActivation)])
    return(RandomPick(maxActivated,1))
  }
  randomLearner <- function(stim,resp){
      c("Word","Nonword")[sample(1:2,1)]
  }
  
  realLearner <- function(stim, resp){
    cues <- unlist(strsplit(stim, "_"))
    
    cueWeights <- weights[cues, outcomeset]
    totalActivation <- ColSums(cueWeights)
    
    for (j in 1:length(cues)) {
      if (resp == "Nonword") {
        yesType <- "Nonword"
        noType <- "Word"
      } else if (resp == "Word") {
        yesType <- "Word"
        noType <- "Nonword"
      } else {
        stop("Unknown outcome", resp)
      }
      weights[cues[j],yesType] <<-  weights[cues[j],yesType] +
        alpha * beta * (lambda - totalActivation[yesType])
      weights[cues[j],noType] <<-  weights[cues[j],noType] +
        alpha * beta * (0 - totalActivation[noType])
    }
    monkeyGuess <- rate(stim)
    return(monkeyGuess)
  }

  learner <- function(stim, resp) {
      if( runif(1) < randomRatio ){
          randomLearner(stim,resp)
      } else {
          realLearner(stim,resp)
      }
  }
  
  give_weights <- function(){
    return(weights)
  }
  
  return(list(
    GetWeights=give_weights,
    Learner=learner,
    Rate=rate))
}


test_that("MakeMonkey",{
  cueset <- c("a","b","c")
  
  monkey <- MakeMonkey(cueset)
  for (i in 1:1000){
    monkey$Learner("a_b","Word")
    monkey$Learner("b_c","Nonword")
  }
  
  print(monkey$GetWeights())
  expect_equal(monkey$Rate("a"), "Word")
  expect_equal(monkey$Rate("c"), "Nonword")
})

prepareDat <- function(){
  dat <- read.table("dataDan.txt", header=T, stringsAsFactors=F)
  dat$Cues <- orthoCoding(dat$String, grams=2)
  dat$Frequency <- 1
  dat$Stimulus <- dat$Cues
  dat$StimulusType <- factor(ifelse(dat$Type == "word", "NewWord", "Nonword"), c("Nonword","NewWord","LearnedWord"))
  dat$Outcomes <- factor(ifelse(dat$Type == "word", "Word", "Nonword"), c("Word","Nonword"))
  dat <- dat[sample(1:nrow(dat)),]
  cues <- unique(unlist(strsplit(dat$Cues, "_")))
  outcomes <- levels(dat$Outcomes)
  list(cues=cues,outcomes=outcomes,dat=dat)
}

runSim <- function(data, trialCount, alpha, beta, lambda=1, randomRatio) {
  monkey <- MakeMonkey(data$cues, data$outcomes, alpha, beta, lambda=lambda, randomRatio=randomRatio)
  trialNum <- round(trialCount)
  pres <- PresentTrials(
    trialNum,
    monkey$Learner, data$dat)
  return(pres)
}

analyseSim <- function(pres){
  learnedWordNum <- nrow(pres$Stimuli[pres$Stimuli$StimulusType == "LearnedWord",])
  trialNum <- nrow(pres$Trials)
  nonwordTrials <- pres$Trials[pres$Trials$StimulusType == "Nonword",]
  wordTrials <- pres$Trials[pres$Trials$StimulusType != "Nonword",]
  presNonwordNum <- length(unique(nonwordTrials$Stimulus))
  presWordNum <- length(unique(wordTrials$Stimulus))
  nonwordAcc <- nrow(nonwordTrials[nonwordTrials$Response == "Nonword",]) / nrow(nonwordTrials)
  wordAcc <- nrow(nonwordTrials[wordTrials$Response == "Word",]) / nrow(wordTrials)
  genAcc <- (nrow(nonwordTrials) * nonwordAcc + nrow(wordTrials) * wordAcc) / trialNum
  
  c("Sim",
    trialNum,
    learnedWordNum,
    presNonwordNum,
    genAcc,
    wordAcc,
    nonwordAcc)
}

simulateAndAnalyse <- function(data, trialCount, alpha=sqrt(0.001), beta=sqrt(0.001), randomRatio=0.2){
  p <- runSim(data, trialCount, alpha, beta, lambda=1, randomRatio)
  analyseSim(p)
}


########################################################################################
####                            IGNORE THIS BLOCK
########################################################################################
if (FALSE) {
  data <- prepareDat()
  
  testfunc = function() {
    n <- 10
    trialCount <- 50000
    for( a in 0:n) {
      k <- 1.0 / n * a
      print(k)
      #s <- simulateAndAnalyse(data, trialCount, alpha=1, beta=k)
      s <- simulateAndAnalyse(data, trialCount, alpha=0.000001, beta=0.00001)
      s[1] <- as.character(k)
      monkeydat <- rbind(monkeydat,
                         s)
    }
    return(monkeydat)
  }
  
  
}

########################################################################################
####                            RUNNING THE PARALLELIZED EXPERIMENTS
########################################################################################

# code for parallelizing the experiment.
# Goal: Being able to run without a shared data structure
# and thus avoiding conflicts with critical sections

print.iterations = FALSE #Note: Not meaningful for parallel execution,
#see http://blog.revolutionanalytics.com/2015/02/monitoring-progress-of-a-foreach-parallel-job.html
#for a discussion

num.cores = 15 #Can NOT be changed without further code adjustment
monkeys.per.core = 8 #Can NOT be changed without further code adjustment


start.of.a.and.b = 0.001 #Can be changed with no further code adjustment
end.of.a.and.b = 0.300 #Can be changed with no further code adjustment

debug = TRUE


#8 experiments for each core, 15 cores
# -> 120 Experiments possible.
#This is exactly the number of experiments
#necessary to run 15 x 15 conditions in
#a symmetrical setting (= little Gauss of 15)


a.and.b.combinations <- matrix(nrow=2, ncol=0)
aseq <- seq(from=start.of.a.and.b, to=end.of.a.and.b, length.out=15)
step <- aseq[2] - aseq[1]
for(a in aseq) {
  for(b in seq(from=start.of.a.and.b, to=a, by=step)) {
    a.and.b.combinations <- cbind(a.and.b.combinations, c(a, b))
  }
}

get.a <- function(i, index) {
  return((a.and.b.combinations[1, ((i*monkeys.per.core)-(monkeys.per.core-1)) : (i*monkeys.per.core)])[index])
}

get.b <- function(i, index) {
  return((a.and.b.combinations[2, ((i*monkeys.per.core)-(monkeys.per.core-1)) : (i*monkeys.per.core)])[index])
}

do.experiment <- function(i) {
    resultdir <- "results"
    dir.create(resultdir, showWarnings = FALSE) # dont show warning if dir exists
  filename <- paste(resultdir, "/", "resultdat", i, ".txt", sep="")
  
  if(debug) {
    resultdat <- data.frame(alpha=numeric(1), beta=numeric(1),
                            Time=character(1),
                            NumWordsLearned=numeric(1),
                            NumNonwordsPresented=numeric(1),
                            GeneralAccuracy=numeric(1),
                            WordAccuracy=numeric(1),
                            NonwordAccuracy=numeric(1))
  } else {
    resultdat <- data.frame(alpha=numeric(1), beta=numeric(1),
                            NumTrials=numeric(1),
                            NumWordsLearned=numeric(1),
                            NumNonwordsPresented=numeric(1),
                            GeneralAccuracy=numeric(1),
                            WordAccuracy=numeric(1),
                            NonwordAccuracy=numeric(1))
  }
  write.table(resultdat, file=filename, col.names=names(resultdat))
  
  trialCount <- 50000 
  r <- 0.65
  for(index in 1:monkeys.per.core) {
    
    a = get.a(i, index)
    b = get.b(i, index)
    resultdat = read.table(filename)
    
    if(print.iterations) {
      print(paste("--running-- i: ", i, " a: ", a, " b: ", b, " r: ", r, sep="" ))
    }
    
    if(debug) {
      Sys.sleep(1)
      current.time = Sys.time()
      debug.frame = data.frame(alpha=a, beta=b,
                        Time=as.character(current.time),
                        NumWordsLearned=numeric(1),
                        NumNonwordsPresented=numeric(1),
                        GeneralAccuracy=numeric(1),
                        WordAccuracy=numeric(1),
                        NonwordAccuracy=numeric(1))
      
      resultdat <- rbind(resultdat, debug.frame)
      
    } else {
      
      s <- simulateAndAnalyse(data, trialCount, alpha=a, beta=b, randomRatio=r )
      s[1] <- as.character(trialCount)
      resultdat <- rbind(resultdat,
                         c(a, b, s[2:length(s)]))
      
    }
  
    #write preliminary result to file -> in case of interruption nothing gets lost
    write.table(resultdat, file=filename, col.names=names(resultdat))
  }
  
  resultdat <- resultdat[-1, ] #remove 1st row which is just always 0 
  write.table(resultdat, file=filename, col.names=names(resultdat))
  return(resultdat)
}


#...RUN, MONKEY, RUN!
cl <- makeCluster(num.cores)
registerDoParallel(cl)

foreach(i=1:num.cores) %dopar% do.experiment(i)

##new version:
#...RUN, MONKEY, RUN!
#registerDoParallel(cores=num.cores)
#foreach(i=1:num.cores) %dopar% do.experiment(i)


########################################################################################
####                            IGNORE THIS BLOCK -> used later for visualizing
########################################################################################

if(FALSE) {
  View(resultdat)
  
  res.matrix = matrix(nrow=length(a.seq), ncol=length(b.seq))
  counter = 1
  counter.a = 1
  for( a in a.seq) {
    counter.b = 1
    for (b in b.seq) {
      res.matrix[counter.a, counter.b] = as.numeric(resultdat[counter, ]$GeneralAccuracy)
      counter = counter + 1
      counter.b = counter.b + 1
    }
    counter.a = counter.a + 1
  }
  
  View(res.matrix)
  
  # TODO: Durchlaufen lassen fuer ca 50k Trials fuer verschiedene alpha, beta zwischen 0 und 1
  # TODO: Plotten der verschiedenen Ergebnisse (num,acc) als heatmap 2d plot.
  # TODO: Plotten der Unterschiede zu den Vorgabeaffen und optimale alpha beta fuer diese finden.
  
  #mat1 = matrix(rexp(200, rate=.1), ncol=50, nrow=50)
  #mat2 = matrix(rexp(200, rate=.1), ncol=50, nrow=50)
  
  image.plot(res.matrix, main="Word Accuracy", xlab="alpha", ylab="beta",
             useRaster=TRUE, col = topo.colors(100))
  
  
  View(m1)
  
  dat03 = data.frame()
  for(i in 0:3) {
    d = read.table(paste("resultdat", i, ".txt", sep=""))
    d = d[-1, ]
    dat03 = rbind(dat03, d)
  }
  View(dat03)
  
  write.table(dat03, file="preliminary_results.txt", col.names=names(dat03))
  
  
  
  prelimdat = read.table("preliminary_resultsV2.txt")
  View(prelimdat)
  newrow = cbind(0.000001, 0.00001, monkeydat[65, 2:7])
  names(newrow)[1]= "alpha"
  names(newrow)[2] = "beta"
  prelimdat = rbind(prelimdat, newrow)
  View(prelimdat)
  write.table(prelimdat, file="preliminary_resultsV2.txt", col.names=names(dat03))
}



# EOF