Commit 4e0ee533 authored by Thomas Huetter's avatar Thomas Huetter
Browse files added new statistics implementation

parent 4f71576b
#!/usr/bin/env python
File name:
Author: Thomas Huetter
Program: Wrapper script to call ../build/ted-join-experiments. Instead
of writing tothe database, the output is written to stdout in
json format. Called from /src/.
import sys
import argparse
import json
import re
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
raise argparse.ArgumentTypeError('Boolean value expected.')
# parse input argurments
parser = argparse.ArgumentParser()
parser.add_argument("--inputfile", type=str,
help="path to input files containing line seperated trees in bracket notation")
parser.add_argument("--printlabels", action='store_true', dest='printlabels',
help="print label distribution")
args = parser.parse_args()
labels = {} # dict that holds a counter for each node label
number_of_trees = 0
sum_of_tree_sizes = 0
min_tree_size = 0
max_tree_size = 0
# open inputfile
with open(args.inputfile) as f:
# for each tree in the inputfile
for line in f:
# split line by '{' and '}' and remove empty entries at the beginning and end
nodes = re.split(r'[{}]+', line.strip())[1:-1]
# get tree size
tree_size = len(nodes)
# sum to compute average
sum_of_tree_sizes += tree_size
# do statistics
number_of_trees += 1
if min_tree_size == 0 or tree_size < min_tree_size:
min_tree_size = tree_size
if tree_size > max_tree_size:
max_tree_size = tree_size
# take a look at each node label
for label in nodes:
labels[label] = labels.get(label, 0) + 1
# print result to stdout
print("{\"number_of_trees\": " + str(number_of_trees), end='')
print(", \"avg_tree_size\": " + str(sum_of_tree_sizes/number_of_trees), end='')
print(", \"min_tree_size\": " + str(min_tree_size), end='')
print(", \"max_tree_size\": " + str(max_tree_size), end='')
print(", \"different_label\": " + str(len(labels)), end='')
if args.printlabels:
print(", ")
print("\"labels\": ", end='')
print(str(labels).replace("\'", "\""))
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment