Newer
Older
linguistic_assignments / 02_word_freq / src / wordify.sh
@David-Elias Kuenstle David-Elias Kuenstle on 3 Nov 2015 304 bytes Change scripts to use STDIN
#! /bin/bash

# concatenate all input files
cat - |

# remove BOM from UTF-8
sed '1 s/^\xef\xbb\xbf//' |

# remove punctuation 
tr --delete "[:punct:]" |

# move words in separate lines
tr "[:space:]" "\n" |

# remove empty lines
tr --squeeze-repeats "\n" |

# make lower case
tr "[:upper:]" "[:lower:]"