Commit a900d0f7 authored by Thomas Huetter's avatar Thomas Huetter

added histogram based algorithms to join experiments

parent 199f52b7
......@@ -61,6 +61,10 @@ target_include_directories(
external/tree-similarity/src/join/tjoin
external/tree-similarity/src/join/tang
external/tree-similarity/src/join/naive
external/tree-similarity/src/join/binary_branches
external/tree-similarity/src/join/label_histogram
external/tree-similarity/src/join/degree_histogram
external/tree-similarity/src/join/leaf_dist_histogram
external/tree-similarity/src/label
external/tree-similarity/src/node
external/tree-similarity/src/parser
......
{
"datasets": [
"bolzano/bolzano_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
},
{ "name": "leaf_distance_histogram_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
},
{ "name": "label_histogram_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
},
{ "name": "degree_histogram_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
},
{ "name": "binary_branches_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
}
]
}
\ No newline at end of file
{
"datasets": [
"dblp/dblp_no_www_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
}
]
}
\ No newline at end of file
{
"datasets": [
"python/python_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 5.0, 10.0, 15.0, 20.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
11.0, 12.0, 13.0, 14.0, 15.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
},
{ "name": "leaf_distance_histogram_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
},
{ "name": "label_histogram_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
},
{ "name": "degree_histogram_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
},
{ "name": "binary_branches_join",
"verification_algorithm" : "Touzet",
"upperbound": "none"
}
]
}
\ No newline at end of file
{
"datasets": [
"swissprot/swissprot_sorted.bracket"
],
"thresholds": [
1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0,
35.0, 40.0, 45.0, 50.0, 55.0, 60.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
}
]
}
\ No newline at end of file
{
"datasets": [
"dblp/dblp_no_www_sorted.bracket"
],
"thresholds": [
10.0
],
"algorithms": [
"--apted", "--tzd", "--lg"
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
5.0,10.0,15.0,20.0
],
"algorithms": [
"--apted", "--tzd", "--lg"
]
}
\ No newline at end of file
......@@ -110,6 +110,118 @@ CREATE TABLE tang_join (
upperbound_pruned bigint -- Number of pairs in the result set by upperbound computation.
);
DROP TABLE IF EXISTS binary_branches_join;
CREATE TABLE binary_branches_join (
-- Common attributes.
execution_id serial PRIMARY KEY,
experiments_version varchar(127),
experiments_timestamp timestamp,
hostname varchar(127),
dataset_filename varchar(127) REFERENCES dataset(filename),
dataset_parsing_time bigint,
algorithm_version varchar(127),
threshold decimal,
sum_subproblems bigint,
join_result_size bigint,
sum_subproblem_optimum bigint,
optimum_time bigint,
-- Algorithm-specific attributes.
verification_algorithm varchar(31),
tree_to_set_time bigint,
inv_list_lookups bigint, -- Pairs of trees that the index looks at.
index_verification_candidates bigint, -- Pairs of trees resulting from index only.
index_time bigint, -- Total time of the index and its verification step.
verification_candidates bigint, -- Pairs of trees resulting after verification step of the index.
verification_time bigint, -- TED verification time.
upperbound varchar(127), -- string that defines which upperbound is used
upperbound_time bigint, -- Time spent to compute upperbounds for verification candidates.
upperbound_pruned bigint -- Number of pairs in the result set by upperbound computation.
);
DROP TABLE IF EXISTS label_histogram_join;
CREATE TABLE label_histogram_join (
-- Common attributes.
execution_id serial PRIMARY KEY,
experiments_version varchar(127),
experiments_timestamp timestamp,
hostname varchar(127),
dataset_filename varchar(127) REFERENCES dataset(filename),
dataset_parsing_time bigint,
algorithm_version varchar(127),
threshold decimal,
sum_subproblems bigint,
join_result_size bigint,
sum_subproblem_optimum bigint,
optimum_time bigint,
-- Algorithm-specific attributes.
verification_algorithm varchar(31),
tree_to_set_time bigint,
inv_list_lookups bigint, -- Pairs of trees that the index looks at.
index_verification_candidates bigint, -- Pairs of trees resulting from index only.
index_time bigint, -- Total time of the index and its verification step.
verification_candidates bigint, -- Pairs of trees resulting after verification step of the index.
verification_time bigint, -- TED verification time.
upperbound varchar(127), -- string that defines which upperbound is used
upperbound_time bigint, -- Time spent to compute upperbounds for verification candidates.
upperbound_pruned bigint -- Number of pairs in the result set by upperbound computation.
);
DROP TABLE IF EXISTS leaf_distance_histogram_join;
CREATE TABLE leaf_distance_histogram_join (
-- Common attributes.
execution_id serial PRIMARY KEY,
experiments_version varchar(127),
experiments_timestamp timestamp,
hostname varchar(127),
dataset_filename varchar(127) REFERENCES dataset(filename),
dataset_parsing_time bigint,
algorithm_version varchar(127),
threshold decimal,
sum_subproblems bigint,
join_result_size bigint,
sum_subproblem_optimum bigint,
optimum_time bigint,
-- Algorithm-specific attributes.
verification_algorithm varchar(31),
tree_to_set_time bigint,
inv_list_lookups bigint, -- Pairs of trees that the index looks at.
index_verification_candidates bigint, -- Pairs of trees resulting from index only.
index_time bigint, -- Total time of the index and its verification step.
verification_candidates bigint, -- Pairs of trees resulting after verification step of the index.
verification_time bigint, -- TED verification time.
upperbound varchar(127), -- string that defines which upperbound is used
upperbound_time bigint, -- Time spent to compute upperbounds for verification candidates.
upperbound_pruned bigint -- Number of pairs in the result set by upperbound computation.
);
DROP TABLE IF EXISTS degree_histogram_join;
CREATE TABLE degree_histogram_join (
-- Common attributes.
execution_id serial PRIMARY KEY,
experiments_version varchar(127),
experiments_timestamp timestamp,
hostname varchar(127),
dataset_filename varchar(127) REFERENCES dataset(filename),
dataset_parsing_time bigint,
algorithm_version varchar(127),
threshold decimal,
sum_subproblems bigint,
join_result_size bigint,
sum_subproblem_optimum bigint,
optimum_time bigint,
-- Algorithm-specific attributes.
verification_algorithm varchar(31),
tree_to_set_time bigint,
inv_list_lookups bigint, -- Pairs of trees that the index looks at.
index_verification_candidates bigint, -- Pairs of trees resulting from index only.
index_time bigint, -- Total time of the index and its verification step.
verification_candidates bigint, -- Pairs of trees resulting after verification step of the index.
verification_time bigint, -- TED verification time.
upperbound varchar(127), -- string that defines which upperbound is used
upperbound_time bigint, -- Time spent to compute upperbounds for verification candidates.
upperbound_pruned bigint -- Number of pairs in the result set by upperbound computation.
);
-- Parameters of a ted experiment (for normalization):
-- ted_experiment_timestamp timestamp,
......
This diff is collapsed.
......@@ -40,6 +40,10 @@
#include "t_join.h"
#include "tang_join.h"
#include "naive_join.h"
#include "lh_join.h"
#include "dh_join.h"
#include "ldh_join.h"
#include "bb_join.h"
#include "unit_cost_model.h"
#include "label_set_converter.h"
#include "binary_tree_converter.h"
......
......@@ -128,7 +128,7 @@ def main():
# build command that needs to be executed
cmd = []
# call binary
if a['name'] == 't_join' or a['name'] == 'tang_join':
if a['name'] == 't_join' or a['name'] == 'tang_join' or a['name'] == 'leaf_distance_histogram_join' or a['name'] == 'label_histogram_join' or a['name'] == 'degree_histogram_join' or a['name'] == 'binary_branches_join':
algorithm_params = {
"verification_algorithm" : a['verification_algorithm'],
"upperbound" : a['upperbound']
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment