diff --git a/02_word_freq/src/plotfreq.r b/02_word_freq/src/plotfreq.r index 723d903..d5a76fc 100755 --- a/02_word_freq/src/plotfreq.r +++ b/02_word_freq/src/plotfreq.r @@ -1,9 +1,16 @@ #! /usr/bin/Rscript args <- commandArgs(trailingOnly = TRUE) +if (length(args) > 0){ + outfile = args[1] +} else { + outfile = "freqPlot.pdf" +} -lwords <- lapply(args, readLines) -words <- unlist(lwords) +f <- file("stdin") +open(f) +words <- readLines(f) +close(f) head(words) logWordFreq <- sort(log(table(words)),decreasing = TRUE) @@ -14,7 +21,9 @@ logFreqFreqRank <- log(1:length(logFreqFreq)) head(logFreqFreq) -pdf() +print(paste("Save plot to", outfile)) +a4width <- 8.3 +pdf(outfile, width=a4width, height=a4width/2) par(mfcol=c(1,2)) plot(logWordFreqRank, logWordFreq, @@ -26,4 +35,3 @@ type='l', xlab="log rank", ylab="log frequency of word frequency") -dev.off() diff --git a/02_word_freq/src/wordify.sh b/02_word_freq/src/wordify.sh index 34b3937..6b852ea 100755 --- a/02_word_freq/src/wordify.sh +++ b/02_word_freq/src/wordify.sh @@ -1,7 +1,7 @@ #! /bin/bash # concatenate all input files -cat $* | +cat - | # remove BOM from UTF-8 sed '1 s/^\xef\xbb\xbf//' |