diff --git a/02_word_freq/src/plotfreq.r b/02_word_freq/src/plotfreq.r new file mode 100755 index 0000000..723d903 --- /dev/null +++ b/02_word_freq/src/plotfreq.r @@ -0,0 +1,29 @@ +#! /usr/bin/Rscript + +args <- commandArgs(trailingOnly = TRUE) + +lwords <- lapply(args, readLines) +words <- unlist(lwords) +head(words) + +logWordFreq <- sort(log(table(words)),decreasing = TRUE) +logWordFreqRank <- log(1:length(logWordFreq)) +head(logWordFreq) + +logFreqFreq <- sort(log(table(logWordFreq)), decreasing = TRUE) +logFreqFreqRank <- log(1:length(logFreqFreq)) +head(logFreqFreq) + +pdf() +par(mfcol=c(1,2)) +plot(logWordFreqRank, + logWordFreq, + type='l', + xlab="log rank", + ylab="log word frequency") +plot(logFreqFreqRank, + logFreqFreq, + type='l', + xlab="log rank", + ylab="log frequency of word frequency") +dev.off()