Commit 5056dc25 authored by Mateusz Pawlik's avatar Mateusz Pawlik
Browse files

bolzano: Added command to make all labels equal - for an example of a worst case experiment.

parent f9b4a2bc
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -29,5 +29,9 @@ iconv -f ISO-8859-1 -t "UTF-8" L.trees | tail -n +14 | sed 's/.*://' | awk '{p
#     convert file into UTF-8 format   | remove header |  remove IDs   | sort by number of nodes (equivalent to number of "{")
iconv -f ISO-8859-1 -t "UTF-8" R.trees | tail -n +14   | sed 's/.*://' | awk '{print gsub("{","{"), $0}' | sort -n  | cut -d' ' -f2- > ../R_preprocessed.txt

# prepare dataset with a single label
#                         | remove non-bracket chars. | add single dummy label 'o' > save to file
cat ../L_preprocessed.txt | sed 's/[^\{\}]//g'        | sed 's/[\{]/\{o/g'         > ../L_preprocessed_single_label.txt

# go back to the folder
cd ..
 No newline at end of file