Commit 03845e51 authored by Thomas Huetter's avatar Thomas Huetter
Browse files

sentiment/download_prepare.sh: remove whitespaces before brackets

parent 38ad0aca
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -22,8 +22,8 @@ rm -rf trainDevTestTrees_PTB.zip
cd trees

# prepare data for file L.trees
# convert dev.txt and train.txt into UTF-8 format |   replace ( by {    |   replace ) by }    | sort by number of nodes (equivalent to number of "{")
iconv -f ISO-8859-1 -t "UTF-8" dev.txt train.txt  | sed -e 's/(/{/g' | sed -e 's/)/}/g' | awk '{print gsub("{","{"), $0}' | sort -n  | cut -d' ' -f2- > ../sentiment.bracket
# convert dev.txt and train.txt into UTF-8 format |  replace ( by {  |   replace ) by } |   remove whitespace before '{'   | sort by number of nodes (equivalent to number of "{")
iconv -f ISO-8859-1 -t "UTF-8" dev.txt train.txt  | sed -e 's/(/{/g' | sed -e 's/)/}/g' | sed -E 's/[[:space:]]([{])/\1/g' | awk '{print gsub("{","{"), $0}' | sort -n  | cut -d' ' -f2- > ../sentiment.bracket

# go back to the folder
cd ..
 No newline at end of file