diff --git a/02_word_freq/src/wordify.sh b/02_word_freq/src/wordify.sh new file mode 100755 index 0000000..34b3937 --- /dev/null +++ b/02_word_freq/src/wordify.sh @@ -0,0 +1,19 @@ +#! /bin/bash + +# concatenate all input files +cat $* | + +# remove BOM from UTF-8 +sed '1 s/^\xef\xbb\xbf//' | + +# remove punctuation +tr --delete "[:punct:]" | + +# move words in separate lines +tr "[:space:]" "\n" | + +# remove empty lines +tr --squeeze-repeats "\n" | + +# make lower case +tr "[:upper:]" "[:lower:]"