Commit bb064f34 authored by Mateusz Pawlik's avatar Mateusz Pawlik

Merge branch 'histograms' into develop

parents 78023e35 0037fefa
*.DS_Store
......@@ -71,6 +71,8 @@ target_include_directories(
external/tree-similarity/src/parser
external/tree-similarity/src/ted
external/tree-similarity/src/ted_ub
external/tree-similarity/src/ted_lb
external/tree-similarity/src/join/guha
)
# Let the compiler know to find the header files in TreeSimilarity library.
......
......@@ -7,7 +7,10 @@ Follow the instructions below to reproduce the environment and the experiments.
## ICDE 2019 Reproducibility
For reproducing the experiments of the ICDE 2019 submission, checkout the tag
This repository contains experiments of our ICDE 2019 paper
[Effective Filters and Linear Time Verification for Tree Similarity Joins](http://eplus.uni-salzburg.at/obvusboa/download/pdf/4486886).
To reproduce the experiments of the ICDE 2019 paper, checkout the tag
`icde2019` of this and
[Tree Similarity library](https://github.com/DatabaseGroup/tree-similarity/tree/develop)
repositories.
......@@ -16,8 +19,15 @@ Obtain datasets from our
[Datasets repository](https://frosch.cosy.sbg.ac.at/mpawlik/ted-datasets).
Execute the experiments with all config files in `configs/icde2019` directory.
See execution details below. You may need to modify `--dataset_path` parameter
value when executing the experiments.
For LGM Upper Bound and BSM verification experiment, certain views must present
in the databse. After executing all experiments, execute `src/ted_algs/view_queries.sql`
on the database holding the experiment results.
Plot the results using `src/plots/call_plot.sh` file.
Plot the results using `src/plots/create_all_plots.sh` file from `src/plots/`
directory.
## Build the project
......
......@@ -3,16 +3,30 @@
"bolzano/bolzano_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "histogram_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
},
{ "name": "binary_branches_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "-1"
},
{ "name": "tang_join",
"verification_algorithm" : "Touzet",
"verification_algorithm" : "APTED",
"upperbound": "none"
}
]
......
......@@ -11,4 +11,4 @@
"upperbound": "greedy"
}
]
}
\ No newline at end of file
}
{
"datasets": [
"swissprot/swissprot_sorted.bracket"
"dblp/dblp_10000_sorted.bracket",
"dblp/dblp_50000_sorted.bracket",
"dblp/dblp_100000_sorted.bracket"
],
"thresholds": [
1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0
6.0
],
"algorithms": [
{ "name": "t_join",
......@@ -18,6 +20,11 @@
"verification_algorithm" : "APTED",
"upperbound": "none"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "-1"
},
{ "name": "tang_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
......
......@@ -11,4 +11,4 @@
"upperbound": "greedy"
}
]
}
\ No newline at end of file
}
......@@ -3,16 +3,29 @@
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
11.0, 12.0, 13.0, 14.0, 15.0
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "histogram_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
},
{ "name": "binary_branches_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "-1"
},
{ "name": "tang_join",
"verification_algorithm" : "Touzet",
"verification_algorithm" : "APTED",
"upperbound": "none"
}
]
......
......@@ -3,33 +3,33 @@
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
11.0, 12.0, 13.0, 14.0, 15.0
10.0, 11.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "2",
"reference_set_id": "4"
},
{ "name": "leaf_distance_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "4",
"reference_set_id": "4"
},
{ "name": "label_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "8",
"reference_set_id": "4"
},
{ "name": "degree_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "16",
"reference_set_id": "4"
},
{ "name": "binary_branches_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "32",
"reference_set_id": "4"
}
]
}
\ No newline at end of file
......@@ -3,33 +3,33 @@
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
11.0, 12.0, 13.0, 14.0, 15.0
12.0, 13.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "2",
"reference_set_id": "5"
},
{ "name": "leaf_distance_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "4",
"reference_set_id": "5"
},
{ "name": "label_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "8",
"reference_set_id": "5"
},
{ "name": "degree_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "16",
"reference_set_id": "5"
},
{ "name": "binary_branches_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "32",
"reference_set_id": "5"
}
]
}
}
\ No newline at end of file
{
"datasets": [
"bolzano/bolzano_sorted.bracket"
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0
14.0, 15.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "2",
"reference_set_id": "6"
},
{ "name": "leaf_distance_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "4",
"reference_set_id": "6"
},
{ "name": "label_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "8",
"reference_set_id": "6"
},
{ "name": "degree_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "16",
"reference_set_id": "6"
},
{ "name": "binary_branches_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "32",
"reference_set_id": "6"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0
],
"algorithms": [
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "0"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "0"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "0"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "0"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "0"
}
]
}
\ No newline at end of file
{
"datasets": [
"dblp/dblp_10000_sorted.bracket"
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0
4.0, 5.0
],
"algorithms": [
{ "name": "t_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "2",
"reference_set_id": "1"
},
{ "name": "leaf_distance_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "4",
"reference_set_id": "1"
},
{ "name": "label_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "8",
"reference_set_id": "1"
},
{ "name": "degree_histogram_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "16",
"reference_set_id": "1"
},
{ "name": "binary_branches_join",
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
"reference_set_size": "32",
"reference_set_id": "1"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
6.0, 7.0
],
"algorithms": [
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "2"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "2"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "2"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "2"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "2"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
8.0, 9.0
],
"algorithms": [
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "3"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "3"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "3"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "3"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "3"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
10.0, 11.0
],
"algorithms": [
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "4"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "4"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "4"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "4"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "4"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
12.0, 13.0
],
"algorithms": [
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "5"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "5"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "5"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "5"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "5"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
14.0, 15.0
],
"algorithms": [
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "6"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "6"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "6"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "6"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "6"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0
],
"algorithms": [
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "0"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "0"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "0"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",