Commit 4f71576b authored by root's avatar root
Browse files

removed encoding fix and added link for troubleshooting

parent c5b377e3
Loading
Loading
Loading
Loading
+5 −0
Original line number Original line Diff line number Diff line
@@ -51,3 +51,8 @@ Execute to sort the dataset by tree size.
```bash
```bash
./tidy-up.sh
./tidy-up.sh
```
```

## Troubleshooting

- Encoding
In case of encoding error follow the steps on this webpage: [https://www.thomas-krenn.com/de/wiki/Perl_warning_Setting_locale_failed_unter_Debian](https://www.thomas-krenn.com/de/wiki/Perl_warning_Setting_locale_failed_unter_Debian).
+1 −1
Original line number Original line Diff line number Diff line
@@ -32,7 +32,7 @@ dblp_data_tree = etree.parse('dblp-2017-11-01.xml', dblp_parser)
root = dblp_data_tree.getroot()
root = dblp_data_tree.getroot()


# Output files.
# Output files.
dblp_bracket = open('dblp.bracket', mode='w', encoding="utf8")
dblp_bracket = open('dblp.bracket', 'w')


print("--- Processing each child of DBLP's root.")
print("--- Processing each child of DBLP's root.")


+3 −3
Original line number Original line Diff line number Diff line
import sys
import random
import random
import sys


number = int(sys.argv[1])
number = int(sys.argv[1])
filename = sys.argv[2]
filename = sys.argv[2]


with open(filename, encoding='utf-8') as f:
with open(filename) as f:
    lines = f.readlines()
    lines = f.readlines()


linestoprint = random.sample(range(len(lines)), number)
linestoprint = random.sample(range(len(lines)), number)


for ln in linestoprint:
for ln in linestoprint:
    print(lines[ln].encode('utf8'), end='')
    print(lines[ln], end='')