From c4240f8745a6b98323bc8822f5d9bf8367e2cb52 Mon Sep 17 00:00:00 2001 From: thuetter Date: Wed, 2 May 2018 14:28:38 +0200 Subject: [PATCH] dblp/README.md: alternative dblp_no_www command --- dblp/README.md | 5 +++++ xmark/xmark_to_bracket.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dblp/README.md b/dblp/README.md index 2af3ecd..72c6969 100644 --- a/dblp/README.md +++ b/dblp/README.md @@ -51,6 +51,11 @@ Execute to remove the homepage entries. ```bash sed '/{www{key{/d' dblp_sorted.bracket > dblp_no_www_sorted.bracket ``` +or +```bash +awk '!/{www{key{/' dblp_sorted.bracket > dblp_no_www_sorted.bracket +``` + **(Optional)** Execute to delete all downloaded files. It leaves only the output dataset files. ```bash diff --git a/xmark/xmark_to_bracket.py b/xmark/xmark_to_bracket.py index f466a59..a800cbb 100644 --- a/xmark/xmark_to_bracket.py +++ b/xmark/xmark_to_bracket.py @@ -46,7 +46,7 @@ tree_id = 0 for child in root: tree_id += 1 # Printing simple progress. - if tree_id % 10000 == 0: + if tree_id % 1000 == 0: print("- Tree %s" % (tree_id)) handler = XMarkContentHandler() lxml.sax.saxify(child, handler) -- GitLab