Commit 38ad0aca authored by Thomas Huetter's avatar Thomas Huetter

correct database attribute names in python script and added seed to dblp random lines

parent 0b033515
......@@ -7,6 +7,7 @@ filename = sys.argv[2]
with open(filename) as f:
lines = f.readlines()
random.seed("12345678")
linestoprint = random.sample(range(len(lines)), number)
for ln in linestoprint:
......
......@@ -88,11 +88,11 @@ with open(args.inputfile) as f:
dataset_info = "{"
dataset_info += "\"filename\": \"" + str(filename) + "\""
dataset_info += ", \"short_description\": \"" + str(short_description) + "\""
dataset_info += ", \"number_of_trees\": " + str(number_of_trees)
dataset_info += ", \"number_trees\": " + str(number_of_trees)
dataset_info += ", \"avg_tree_size\": " + str(sum_of_tree_sizes/number_of_trees)
dataset_info += ", \"min_tree_size\": " + str(min_tree_size)
dataset_info += ", \"max_tree_size\": " + str(max_tree_size)
dataset_info += ", \"different_label\": " + str(len(labels))
dataset_info += ", \"number_labels\": " + str(len(labels))
dataset_info += "}"
# print result to stdout
......@@ -100,7 +100,7 @@ print(dataset_info)
# store dataset in db
if args.storeindb:
store_result(db_table_name, dataset_info)
store_result(db_table_name, json.loads(dataset_info))
# print label distribution
if args.printlabels:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment