Commit 37c55f48 authored by Thomas Huetter's avatar Thomas Huetter
Browse files

started to rewrite the ploting script

parent 2ed80fca
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
], ],
"name": "$\\mathtt{TJoin}$", "name": "$\\mathtt{TJoin}$",
"constraints": { "constraints": {
"upperbound": "greedy" "upperbound": ["greedy"]
} }
}, },
{ {
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
], ],
"name": "$\\mathtt{Guha\\ K=2}$", "name": "$\\mathtt{Guha\\ K=2}$",
"constraints": { "constraints": {
"reference_set_size": 2 "reference_set_size": [2]
} }
}, },
{ {
...@@ -71,12 +71,12 @@ ...@@ -71,12 +71,12 @@
], ],
"name": "$\\mathtt{Guha\\ K=4}$", "name": "$\\mathtt{Guha\\ K=4}$",
"constraints": { "constraints": {
"reference_set_size": 4 "reference_set_size": [4]
} }
} }
], ],
"constraints": { "constraints": {
"dataset_filename": "bolzano_sorted.bracket" "dataset_filename": ["bolzano_sorted.bracket"]
}, },
"x_axis": { "x_axis": {
"db_column": "threshold", "db_column": "threshold",
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
], ],
"name": "$\\mathtt{TJoin}$", "name": "$\\mathtt{TJoin}$",
"constraints": { "constraints": {
"upperbound": "greedy" "upperbound": ["greedy"]
} }
}, },
{ {
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
], ],
"name": "$\\mathtt{Guha\\ K=2}$", "name": "$\\mathtt{Guha\\ K=2}$",
"constraints": { "constraints": {
"reference_set_size": 2 "reference_set_size": [2]
} }
}, },
{ {
...@@ -71,12 +71,12 @@ ...@@ -71,12 +71,12 @@
], ],
"name": "$\\mathtt{Guha\\ K=4}$", "name": "$\\mathtt{Guha\\ K=4}$",
"constraints": { "constraints": {
"reference_set_size": 4 "reference_set_size": [4]
} }
} }
], ],
"constraints": { "constraints": {
"dataset_filename": "dblp_no_www_sorted.bracket" "dataset_filename": ["dblp_no_www_sorted.bracket"]
}, },
"x_axis": { "x_axis": {
"db_column": "threshold", "db_column": "threshold",
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
], ],
"name": "$\\mathtt{TJoin}$", "name": "$\\mathtt{TJoin}$",
"constraints": { "constraints": {
"upperbound": "greedy" "upperbound": ["greedy"]
} }
}, },
{ {
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
], ],
"name": "$\\mathtt{Guha\\ K=2}$", "name": "$\\mathtt{Guha\\ K=2}$",
"constraints": { "constraints": {
"reference_set_size": 2 "reference_set_size": [2]
} }
}, },
{ {
...@@ -71,12 +71,12 @@ ...@@ -71,12 +71,12 @@
], ],
"name": "$\\mathtt{Guha\\ K=4}$", "name": "$\\mathtt{Guha\\ K=4}$",
"constraints": { "constraints": {
"reference_set_size": 4 "reference_set_size": [4]
} }
} }
], ],
"constraints": { "constraints": {
"dataset_filename": "dblp_10000_sorted.bracket" "dataset_filename": ["dblp_10000_sorted.bracket"]
}, },
"x_axis": { "x_axis": {
"db_column": "threshold", "db_column": "threshold",
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
], ],
"name": "$\\mathtt{TJoin}$", "name": "$\\mathtt{TJoin}$",
"constraints": { "constraints": {
"upperbound": "greedy" "upperbound": ["greedy"]
} }
}, },
{ {
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
], ],
"name": "$\\mathtt{Guha\\ K=2}$", "name": "$\\mathtt{Guha\\ K=2}$",
"constraints": { "constraints": {
"reference_set_size": 2 "reference_set_size": [2]
} }
}, },
{ {
...@@ -71,12 +71,12 @@ ...@@ -71,12 +71,12 @@
], ],
"name": "$\\mathtt{Guha\\ K=4}$", "name": "$\\mathtt{Guha\\ K=4}$",
"constraints": { "constraints": {
"reference_set_size": 4 "reference_set_size": [4]
} }
} }
], ],
"constraints": { "constraints": {
"dataset_filename": "python_sorted.bracket" "dataset_filename": ["python_sorted.bracket"]
}, },
"x_axis": { "x_axis": {
"db_column": "threshold", "db_column": "threshold",
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
], ],
"name": "$\\mathtt{TJoin}$", "name": "$\\mathtt{TJoin}$",
"constraints": { "constraints": {
"upperbound": "greedy" "upperbound": ["greedy"]
} }
}, },
{ {
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
], ],
"name": "$\\mathtt{Guha\\ K=2}$", "name": "$\\mathtt{Guha\\ K=2}$",
"constraints": { "constraints": {
"reference_set_size": 2 "reference_set_size": [2]
} }
}, },
{ {
...@@ -71,12 +71,12 @@ ...@@ -71,12 +71,12 @@
], ],
"name": "$\\mathtt{Guha\\ K=4}$", "name": "$\\mathtt{Guha\\ K=4}$",
"constraints": { "constraints": {
"reference_set_size": 4 "reference_set_size": [4]
} }
} }
], ],
"constraints": { "constraints": {
"dataset_filename": "python_10000_sorted.bracket" "dataset_filename": ["python_10000_sorted.bracket"]
}, },
"x_axis": { "x_axis": {
"db_column": "threshold", "db_column": "threshold",
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
], ],
"name": "$\\mathtt{TJoin}$", "name": "$\\mathtt{TJoin}$",
"constraints": { "constraints": {
"upperbound": "greedy" "upperbound": ["greedy"]
} }
}, },
{ {
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
], ],
"name": "$\\mathtt{Guha\\ K=2}$", "name": "$\\mathtt{Guha\\ K=2}$",
"constraints": { "constraints": {
"reference_set_size": 2 "reference_set_size": [2]
} }
}, },
{ {
...@@ -71,13 +71,12 @@ ...@@ -71,13 +71,12 @@
], ],
"name": "$\\mathtt{Guha\\ K=4}$", "name": "$\\mathtt{Guha\\ K=4}$",
"constraints": { "constraints": {
"reference_set_size": 4 "reference_set_size": [4]
} }
} }
], ],
"constraints": { "constraints": {
"dataset_filename": "sentiment_sorted.bracket", "dataset_filename": ["sentiment_sorted.bracket"]
"verification_algorithm": "Touzet"
}, },
"x_axis": { "x_axis": {
"db_column": "threshold", "db_column": "threshold",
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
], ],
"name": "$\\mathtt{TJoin}$", "name": "$\\mathtt{TJoin}$",
"constraints": { "constraints": {
"upperbound": "greedy" "upperbound": ["greedy"]
} }
}, },
{ {
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
], ],
"name": "$\\mathtt{Guha\\ K=2}$", "name": "$\\mathtt{Guha\\ K=2}$",
"constraints": { "constraints": {
"reference_set_size": 2 "reference_set_size": [2]
} }
}, },
{ {
...@@ -71,12 +71,12 @@ ...@@ -71,12 +71,12 @@
], ],
"name": "$\\mathtt{Guha\\ K=4}$", "name": "$\\mathtt{Guha\\ K=4}$",
"constraints": { "constraints": {
"reference_set_size": 4 "reference_set_size": [4]
} }
} }
], ],
"constraints": { "constraints": {
"dataset_filename": "swissprot_sorted.bracket" "dataset_filename": ["swissprot_sorted.bracket"]
}, },
"x_axis": { "x_axis": {
"db_column": "threshold", "db_column": "threshold",
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
], ],
"name": "$\\mathtt{TJoin}$", "name": "$\\mathtt{TJoin}$",
"constraints": { "constraints": {
"upperbound": "greedy" "upperbound": ["greedy"]
} }
}, },
{ {
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
], ],
"name": "$\\mathtt{Guha\\ K=2}$", "name": "$\\mathtt{Guha\\ K=2}$",
"constraints": { "constraints": {
"reference_set_size": 2 "reference_set_size": [2]
} }
}, },
{ {
...@@ -71,12 +71,12 @@ ...@@ -71,12 +71,12 @@
], ],
"name": "$\\mathtt{Guha\\ K=4}$", "name": "$\\mathtt{Guha\\ K=4}$",
"constraints": { "constraints": {
"reference_set_size": 4 "reference_set_size": [4]
} }
} }
], ],
"constraints": { "constraints": {
"dataset_filename": "swissprot_10000_sorted.bracket" "dataset_filename": ["swissprot_10000_sorted.bracket"]
}, },
"x_axis": { "x_axis": {
"db_column": "threshold", "db_column": "threshold",
......
{
"title": "Collection Size",
"print_title": "no",
"legend_font_size": 18,
"legend_frame_alpha": 0.8,
"legend": "upper right",
"grid": "on",
"dataset_name": "DBLP",
"markers": ["s", "*", "o", "^", "p", "d", "P"],
"markersize": [15, 20, 15, 15, 15, 15, 15],
"markerfacecolor": "none",
"colors": ["red", "green", "blue", "cyan", "magenta", "orange", "greenyellow"],
"tables": [
{
"table_name": "tang_join",
"attributes": [
{
"attr_name": "avg(tree_to_binary_tree_time + index_time + verification_time)/1000"
}
],
"name": "$\\mathtt{Tang}$",
"constraints": {
"verification_algorithm": ["APTED"],
"dataset_filename": ["dblp_10000_sorted.bracket", "dblp_50000_sorted.bracket", "dblp_100000_sorted.bracket"]
}
},
{
"table_name": "t_join",
"attributes": [
{
"attr_name": "avg(tree_to_set_time + index_time + upperbound_time + verification_time)/1000"
}
],
"name": "$\\mathtt{TJoin}$",
"constraints": {
"verification_algorithm": ["Touzet"],
"upperbound": ["greedy"],
"dataset_filename": ["dblp_10000_sorted.bracket", "dblp_50000_sorted.bracket", "dblp_100000_sorted.bracket"]
}
},
{
"table_name": "binary_branches_join",
"attributes": [
{
"attr_name": "avg(tree_to_set_time + index_time + upperbound_time + verification_time)/1000"
}
],
"name": "$\\mathtt{BinBranches}$",
"constraints": {
"verification_algorithm": ["APTED"],
"dataset_filename": ["dblp_10000_sorted.bracket", "dblp_50000_sorted.bracket", "dblp_100000_sorted.bracket"]
}
},
{
"table_name": "histogram_join",
"attributes": [
{
"attr_name": "avg(tree_to_set_time + index_time + upperbound_time + verification_time)/1000"
}
],
"name": "$\\mathtt{Histogram}$",
"constraints": {
"verification_algorithm": ["APTED"],
"dataset_filename": ["dblp_10000_sorted.bracket", "dblp_50000_sorted.bracket", "dblp_100000_sorted.bracket"]
}
},
{
"table_name": "guha_join",
"attributes": [
{
"attr_name": "avg(vectors_time + candidates_time + verification_time)/1000"
}
],
"name": "$\\mathtt{Guha\\ K=2}$",
"constraints": {
"verification_algorithm": ["APTED"],
"reference_set_size": [2],
"dataset_filename": ["dblp_10000_sorted.bracket", "dblp_50000_sorted.bracket", "dblp_100000_sorted.bracket"]
}
}
],
"constraints": {
"threshold": [1]
},
"x_axis": {
"db_column": "dataset_filename",
"size_plot": 1,
"name": "Dataset Size",
"font_size": 20,
"ticks_font_size": 20
},
"y_axis": {
"name": "Time [s]",
"scale": "log",
"font_size": 20,
"ticks_font_size": 20
}
}
\ No newline at end of file
#!/usr/bin/env python3
# The MIT License (MIT)
# Copyright (c) 2017 Thomas Huetter.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
'''
Program: Outputs a plot specified in a given config file.
The data is requested from the ted-join database.
'''
import argparse
import json
import os
import numpy as np
import matplotlib.pyplot as plt
import psycopg2
from psycopg2 import sql
from matplotlib.ticker import EngFormatter
def db_request(service, select_args, table_args, where_args, group_args, order_args):
# Connect to database.
db = psycopg2.connect("service=" + service)
# Open a cursor to perform database operations
cur = db.cursor()
# Build query
query = "SELECT " + select_args + " FROM " + table_args + where_args + group_args + order_args
cur.execute(query)
# Fetch query result
rows = cur.fetchall()
db.commit()
# Close the cursor.
cur.close()
# Close communication with the database.
db.close()
return rows
# parse input argurments
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str,
help="path to input config file (json")
parser.add_argument("--storeplot", type=str, default="",
help="path and filename where the plot should be stored")
parser.add_argument('--service', type=str, required=True,
help="Service name for db connection. Specify this parameter to store the results in a database.")
args = parser.parse_args()
path, filename = os.path.split(args.config)
with open(args.config) as f:
data = json.load(f)
fig, ax = plt.subplots()
# set title
if data['print_title'] != 'no' and 'title' in data:
ax.title(data['title'] + " - " + data['dataset_name'])
# set grid
if 'grid' in data:
ax.grid(data['grid'])
marker_cnt = 0
y = []
result = {}
plot_parameters = {}
# execute the query
for table in data['tables']:
for attr in table['attributes']:
if 'size_plot' in data['x_axis']:
if 'aggregate' in data['y_axis']:
select_args = "split_part(" + data['x_axis']['db_column'] + ", '_', 2), " + data['y_axis']['aggregate'] + "(" + attr['attr_name'] + ")"
else:
select_args = "split_part(" + data['x_axis']['db_column'] + ", '_', 2), " + attr['attr_name']
else:
if 'aggregate' in data['y_axis']:
select_args = data['x_axis']['db_column'] + ", " + data['y_axis']['aggregate'] + "(" + attr['attr_name'] + ")"
else:
select_args = data['x_axis']['db_column'] + ", " + attr['attr_name']
if 'factor' in attr:
select_args += attr['factor']
table_args = table['table_name']
where_args = ''
if 'constraints' in data: