Loading dblp/random_lines.py +1 −0 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ filename = sys.argv[2] with open(filename) as f: lines = f.readlines() random.seed("12345678") linestoprint = random.sample(range(len(lines)), number) for ln in linestoprint: Loading statistics/statistics.py +3 −3 Original line number Diff line number Diff line Loading @@ -88,11 +88,11 @@ with open(args.inputfile) as f: dataset_info = "{" dataset_info += "\"filename\": \"" + str(filename) + "\"" dataset_info += ", \"short_description\": \"" + str(short_description) + "\"" dataset_info += ", \"number_of_trees\": " + str(number_of_trees) dataset_info += ", \"number_trees\": " + str(number_of_trees) dataset_info += ", \"avg_tree_size\": " + str(sum_of_tree_sizes/number_of_trees) dataset_info += ", \"min_tree_size\": " + str(min_tree_size) dataset_info += ", \"max_tree_size\": " + str(max_tree_size) dataset_info += ", \"different_label\": " + str(len(labels)) dataset_info += ", \"number_labels\": " + str(len(labels)) dataset_info += "}" # print result to stdout Loading @@ -100,7 +100,7 @@ print(dataset_info) # store dataset in db if args.storeindb: store_result(db_table_name, dataset_info) store_result(db_table_name, json.loads(dataset_info)) # print label distribution if args.printlabels: Loading Loading
dblp/random_lines.py +1 −0 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ filename = sys.argv[2] with open(filename) as f: lines = f.readlines() random.seed("12345678") linestoprint = random.sample(range(len(lines)), number) for ln in linestoprint: Loading
statistics/statistics.py +3 −3 Original line number Diff line number Diff line Loading @@ -88,11 +88,11 @@ with open(args.inputfile) as f: dataset_info = "{" dataset_info += "\"filename\": \"" + str(filename) + "\"" dataset_info += ", \"short_description\": \"" + str(short_description) + "\"" dataset_info += ", \"number_of_trees\": " + str(number_of_trees) dataset_info += ", \"number_trees\": " + str(number_of_trees) dataset_info += ", \"avg_tree_size\": " + str(sum_of_tree_sizes/number_of_trees) dataset_info += ", \"min_tree_size\": " + str(min_tree_size) dataset_info += ", \"max_tree_size\": " + str(max_tree_size) dataset_info += ", \"different_label\": " + str(len(labels)) dataset_info += ", \"number_labels\": " + str(len(labels)) dataset_info += "}" # print result to stdout Loading @@ -100,7 +100,7 @@ print(dataset_info) # store dataset in db if args.storeindb: store_result(db_table_name, dataset_info) store_result(db_table_name, json.loads(dataset_info)) # print label distribution if args.printlabels: Loading