Loading statistics/statistics.py 0 → 100644 +71 −0 Original line number Diff line number Diff line #!/usr/bin/env python ''' File name: local_experiments.py Author: Thomas Huetter Program: Wrapper script to call ../build/ted-join-experiments. Instead of writing tothe database, the output is written to stdout in json format. Called from /src/. ''' import sys import argparse import json import re def str2bool(v): if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise argparse.ArgumentTypeError('Boolean value expected.') # parse input argurments parser = argparse.ArgumentParser() parser.add_argument("--inputfile", type=str, help="path to input files containing line seperated trees in bracket notation") parser.add_argument("--printlabels", action='store_true', dest='printlabels', help="print label distribution") args = parser.parse_args() labels = {} # dict that holds a counter for each node label number_of_trees = 0 sum_of_tree_sizes = 0 min_tree_size = 0 max_tree_size = 0 # open inputfile with open(args.inputfile) as f: # for each tree in the inputfile for line in f: # split line by '{' and '}' and remove empty entries at the beginning and end nodes = re.split(r'[{}]+', line.strip())[1:-1] # get tree size tree_size = len(nodes) # sum to compute average sum_of_tree_sizes += tree_size # do statistics number_of_trees += 1 if min_tree_size == 0 or tree_size < min_tree_size: min_tree_size = tree_size if tree_size > max_tree_size: max_tree_size = tree_size # take a look at each node label for label in nodes: labels[label] = labels.get(label, 0) + 1 # print result to stdout print("{\"number_of_trees\": " + str(number_of_trees), end='') print(", \"avg_tree_size\": " + str(sum_of_tree_sizes/number_of_trees), end='') print(", \"min_tree_size\": " + str(min_tree_size), end='') print(", \"max_tree_size\": " + str(max_tree_size), end='') print(", \"different_label\": " + str(len(labels)), end='') if args.printlabels: print(", ") print("\"labels\": ", end='') print(str(labels).replace("\'", "\"")) print("}") No newline at end of file Loading
statistics/statistics.py 0 → 100644 +71 −0 Original line number Diff line number Diff line #!/usr/bin/env python ''' File name: local_experiments.py Author: Thomas Huetter Program: Wrapper script to call ../build/ted-join-experiments. Instead of writing tothe database, the output is written to stdout in json format. Called from /src/. ''' import sys import argparse import json import re def str2bool(v): if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise argparse.ArgumentTypeError('Boolean value expected.') # parse input argurments parser = argparse.ArgumentParser() parser.add_argument("--inputfile", type=str, help="path to input files containing line seperated trees in bracket notation") parser.add_argument("--printlabels", action='store_true', dest='printlabels', help="print label distribution") args = parser.parse_args() labels = {} # dict that holds a counter for each node label number_of_trees = 0 sum_of_tree_sizes = 0 min_tree_size = 0 max_tree_size = 0 # open inputfile with open(args.inputfile) as f: # for each tree in the inputfile for line in f: # split line by '{' and '}' and remove empty entries at the beginning and end nodes = re.split(r'[{}]+', line.strip())[1:-1] # get tree size tree_size = len(nodes) # sum to compute average sum_of_tree_sizes += tree_size # do statistics number_of_trees += 1 if min_tree_size == 0 or tree_size < min_tree_size: min_tree_size = tree_size if tree_size > max_tree_size: max_tree_size = tree_size # take a look at each node label for label in nodes: labels[label] = labels.get(label, 0) + 1 # print result to stdout print("{\"number_of_trees\": " + str(number_of_trees), end='') print(", \"avg_tree_size\": " + str(sum_of_tree_sizes/number_of_trees), end='') print(", \"min_tree_size\": " + str(min_tree_size), end='') print(", \"max_tree_size\": " + str(max_tree_size), end='') print(", \"different_label\": " + str(len(labels)), end='') if args.printlabels: print(", ") print("\"labels\": ", end='') print(str(labels).replace("\'", "\"")) print("}") No newline at end of file