#! /bin/bash # concatenate all input files cat - | # remove BOM from UTF-8 sed '1 s/^\xef\xbb\xbf//' | # remove punctuation tr --delete "[:punct:]" | # move words in separate lines tr "[:space:]" "\n" | # remove empty lines tr --squeeze-repeats "\n" | # make lower case tr "[:upper:]" "[:lower:]"