Newer
Older
linguistic_assignments / 02_word_freq / src / wordify.sh
@David-Elias Kuenstle David-Elias Kuenstle on 2 Nov 2015 305 bytes Add script to create word list from text
#! /bin/bash

# concatenate all input files
cat $* |

# remove BOM from UTF-8
sed '1 s/^\xef\xbb\xbf//' |

# remove punctuation 
tr --delete "[:punct:]" |

# move words in separate lines
tr "[:space:]" "\n" |

# remove empty lines
tr --squeeze-repeats "\n" |

# make lower case
tr "[:upper:]" "[:lower:]"