#!/bin/bash ######################################################### #Author: Marie D. # #(File)->String(s) # # **Purpose:** # Given a textfile, the script returns the text # tokenized and sentence split # # **Usage:** # $sh baseline.sh myfile.txt # # **Example of desired output:** # Hello # word # ! # # How # are # you # ? # ######################################################### # # echo "This program will tokenize and sentence split '$1' and output '$2'." sed 's/ /\n/g' $1 | sed -r 's/([\.\!\?])/\n&\n/g' > $2 #1) | #2) ##Tokenisation## #1) Substitute sed command that replaces every space by a carriage return ##Sentence splitting## #2) sed command that seperates ponctuations and adds a carriage return after them.