Commit 4f71576b authored by root's avatar root

removed encoding fix and added link for troubleshooting

parent c5b377e3
...@@ -51,3 +51,8 @@ Execute to sort the dataset by tree size. ...@@ -51,3 +51,8 @@ Execute to sort the dataset by tree size.
```bash ```bash
./tidy-up.sh ./tidy-up.sh
``` ```
## Troubleshooting
- Encoding
In case of encoding error follow the steps on this webpage: [https://www.thomas-krenn.com/de/wiki/Perl_warning_Setting_locale_failed_unter_Debian](https://www.thomas-krenn.com/de/wiki/Perl_warning_Setting_locale_failed_unter_Debian).
...@@ -32,7 +32,7 @@ dblp_data_tree = etree.parse('dblp-2017-11-01.xml', dblp_parser) ...@@ -32,7 +32,7 @@ dblp_data_tree = etree.parse('dblp-2017-11-01.xml', dblp_parser)
root = dblp_data_tree.getroot() root = dblp_data_tree.getroot()
# Output files. # Output files.
dblp_bracket = open('dblp.bracket', mode='w', encoding="utf8") dblp_bracket = open('dblp.bracket', 'w')
print("--- Processing each child of DBLP's root.") print("--- Processing each child of DBLP's root.")
......
import sys
import random import random
import sys
number = int(sys.argv[1]) number = int(sys.argv[1])
filename = sys.argv[2] filename = sys.argv[2]
with open(filename, encoding='utf-8') as f: with open(filename) as f:
lines = f.readlines() lines = f.readlines()
linestoprint = random.sample(range(len(lines)), number) linestoprint = random.sample(range(len(lines)), number)
for ln in linestoprint: for ln in linestoprint:
print(lines[ln].encode('utf8'), end='') print(lines[ln], end='')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment