Commit 4f71576b authored by root's avatar root
Browse files

removed encoding fix and added link for troubleshooting

parent c5b377e3
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -51,3 +51,8 @@ Execute to sort the dataset by tree size.
```bash
./tidy-up.sh
```

## Troubleshooting

- Encoding
In case of encoding error follow the steps on this webpage: [https://www.thomas-krenn.com/de/wiki/Perl_warning_Setting_locale_failed_unter_Debian](https://www.thomas-krenn.com/de/wiki/Perl_warning_Setting_locale_failed_unter_Debian).
+1 −1
Original line number Diff line number Diff line
@@ -32,7 +32,7 @@ dblp_data_tree = etree.parse('dblp-2017-11-01.xml', dblp_parser)
root = dblp_data_tree.getroot()

# Output files.
dblp_bracket = open('dblp.bracket', mode='w', encoding="utf8")
dblp_bracket = open('dblp.bracket', 'w')

print("--- Processing each child of DBLP's root.")

+3 −3
Original line number Diff line number Diff line
import sys
import random
import sys

number = int(sys.argv[1])
filename = sys.argv[2]

with open(filename, encoding='utf-8') as f:
with open(filename) as f:
    lines = f.readlines()

linestoprint = random.sample(range(len(lines)), number)

for ln in linestoprint:
    print(lines[ln].encode('utf8'), end='')
    print(lines[ln], end='')