Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mateusz Pawlik
ted-experiments
Commits
bb064f34
Commit
bb064f34
authored
Feb 28, 2020
by
Mateusz Pawlik
Browse files
Merge branch 'histograms' into develop
parents
78023e35
0037fefa
Changes
139
Hide whitespace changes
Inline
Side-by-side
.gitignore
0 → 100644
View file @
bb064f34
*.DS_Store
CMakeLists.txt
View file @
bb064f34
...
...
@@ -71,6 +71,8 @@ target_include_directories(
external/tree-similarity/src/parser
external/tree-similarity/src/ted
external/tree-similarity/src/ted_ub
external/tree-similarity/src/ted_lb
external/tree-similarity/src/join/guha
)
# Let the compiler know to find the header files in TreeSimilarity library.
...
...
README.md
View file @
bb064f34
...
...
@@ -7,7 +7,10 @@ Follow the instructions below to reproduce the environment and the experiments.
## ICDE 2019 Reproducibility
For reproducing the experiments of the ICDE 2019 submission, checkout the tag
This repository contains experiments of our ICDE 2019 paper
[
Effective Filters and Linear Time Verification for Tree Similarity Joins
](
http://eplus.uni-salzburg.at/obvusboa/download/pdf/4486886
)
.
To reproduce the experiments of the ICDE 2019 paper, checkout the tag
`icde2019`
of this and
[
Tree Similarity library
](
https://github.com/DatabaseGroup/tree-similarity/tree/develop
)
repositories.
...
...
@@ -16,8 +19,15 @@ Obtain datasets from our
[
Datasets repository
](
https://frosch.cosy.sbg.ac.at/mpawlik/ted-datasets
)
.
Execute the experiments with all config files in
`configs/icde2019`
directory.
See execution details below. You may need to modify
`--dataset_path`
parameter
value when executing the experiments.
For LGM Upper Bound and BSM verification experiment, certain views must present
in the databse. After executing all experiments, execute
`src/ted_algs/view_queries.sql`
on the database holding the experiment results.
Plot the results using
`src/plots/call_plot.sh`
file.
Plot the results using
`src/plots/create_all_plots.sh`
file from
`src/plots/`
directory.
## Build the project
...
...
configs/icde2019/bolzano.json
View file @
bb064f34
...
...
@@ -3,16 +3,30 @@
"bolzano/bolzano_sorted.bracket"
],
"thresholds"
:
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
7.0
,
8.0
,
9.0
,
10.0
,
11.0
,
12.0
,
13.0
,
14.0
,
15.0
,
16.0
,
17.0
,
18.0
,
19.0
,
20.0
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
7.0
,
8.0
,
9.0
,
10.0
,
11.0
,
12.0
,
13.0
,
14.0
,
15.0
,
16.0
,
17.0
,
18.0
,
19.0
,
20.0
],
"algorithms"
:
[
{
"name"
:
"t_join"
,
"verification_algorithm"
:
"Touzet"
,
"upperbound"
:
"greedy"
},
{
"name"
:
"histogram_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
},
{
"name"
:
"binary_branches_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"tang_join"
,
"verification_algorithm"
:
"
Touzet
"
,
"verification_algorithm"
:
"
APTED
"
,
"upperbound"
:
"none"
}
]
...
...
configs/icde2019/dblp.json
View file @
bb064f34
...
...
@@ -11,4 +11,4 @@
"upperbound"
:
"greedy"
}
]
}
\ No newline at end of file
}
configs/icde2019
_revision/swissprot
.json
→
configs/icde2019
/dblp_var_size
.json
View file @
bb064f34
{
"datasets"
:
[
"swissprot/swissprot_sorted.bracket"
"dblp/dblp_10000_sorted.bracket"
,
"dblp/dblp_50000_sorted.bracket"
,
"dblp/dblp_100000_sorted.bracket"
],
"thresholds"
:
[
1.0
,
5.0
,
10.0
,
15.0
,
20.0
,
25.0
,
30
.0
6
.0
],
"algorithms"
:
[
{
"name"
:
"t_join"
,
...
...
@@ -18,6 +20,11 @@
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"tang_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
...
...
configs/icde2019/python.json
View file @
bb064f34
...
...
@@ -11,4 +11,4 @@
"upperbound"
:
"greedy"
}
]
}
\ No newline at end of file
}
configs/icde2019/sentiment.json
View file @
bb064f34
...
...
@@ -3,16 +3,29 @@
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
7.0
,
8.0
,
9.0
,
10.0
,
11.0
,
12.0
,
13.0
,
14.0
,
15.0
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
7.0
,
8.0
,
9.0
,
10.0
,
11.0
,
12.0
,
13.0
,
14.0
,
15.0
],
"algorithms"
:
[
{
"name"
:
"t_join"
,
"verification_algorithm"
:
"Touzet"
,
"upperbound"
:
"greedy"
},
{
"name"
:
"histogram_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
},
{
"name"
:
"binary_branches_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"tang_join"
,
"verification_algorithm"
:
"
Touzet
"
,
"verification_algorithm"
:
"
APTED
"
,
"upperbound"
:
"none"
}
]
...
...
configs/icde2019
_histo
/sentiment.json
→
configs/icde2019
/sentiment_rsb_k
/sentiment
_t10
.json
View file @
bb064f34
...
...
@@ -3,33 +3,33 @@
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
7.0
,
8.0
,
9.0
,
10.0
,
11.0
,
12.0
,
13.0
,
14.0
,
15.0
10.0
,
11.0
],
"algorithms"
:
[
{
"name"
:
"t_join"
,
"verification_algorithm"
:
"Touzet"
,
"upperbound"
:
"greedy"
},
{
"name"
:
"tang_join"
,
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"4"
},
{
"name"
:
"
leaf_distance_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"4"
},
{
"name"
:
"
label_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"4"
},
{
"name"
:
"
degree_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"4"
},
{
"name"
:
"
binary_branches
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"4"
}
]
}
\ No newline at end of file
configs/icde2019
_histo
/sentiment_
APTED
.json
→
configs/icde2019
/sentiment_rsb_k
/sentiment_
t12
.json
View file @
bb064f34
...
...
@@ -3,33 +3,33 @@
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
7.0
,
8.0
,
9.0
,
10.0
,
11.0
,
12.0
,
13.0
,
14.0
,
15.0
12.0
,
13.0
],
"algorithms"
:
[
{
"name"
:
"t_join"
,
"verification_algorithm"
:
"Touzet"
,
"upperbound"
:
"greedy"
},
{
"name"
:
"tang_join"
,
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"5"
},
{
"name"
:
"
leaf_distance_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"5"
},
{
"name"
:
"
label_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"5"
},
{
"name"
:
"
degree_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"5"
},
{
"name"
:
"
binary_branches
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"5"
}
]
}
}
\ No newline at end of file
configs/icde2019
_histo/bolzano
.json
→
configs/icde2019
/sentiment_rsb_k/sentiment_t14
.json
View file @
bb064f34
{
"datasets"
:
[
"
bolzano/bolzano
_sorted.bracket"
"
sentiment/sentiment
_sorted.bracket"
],
"thresholds"
:
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
7.0
,
8.0
,
9.0
,
10.0
,
11.0
,
12.0
,
13.0
,
14.0
,
15.0
,
16.0
,
17.0
,
18.0
,
19.0
,
20.0
14.0
,
15.0
],
"algorithms"
:
[
{
"name"
:
"t_join"
,
"verification_algorithm"
:
"Touzet"
,
"upperbound"
:
"greedy"
},
{
"name"
:
"tang_join"
,
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"6"
},
{
"name"
:
"
leaf_distance_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"6"
},
{
"name"
:
"
label_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"6"
},
{
"name"
:
"
degree_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"6"
},
{
"name"
:
"
binary_branches
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"6"
}
]
}
\ No newline at end of file
configs/icde2019/sentiment_rsb_k/sentiment_t2.json
0 → 100644
View file @
bb064f34
{
"datasets"
:
[
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
1.0
,
2.0
,
3.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"0"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"0"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"0"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"0"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"0"
}
]
}
\ No newline at end of file
configs/icde2019
_histo/dblp
.json
→
configs/icde2019
/sentiment_rsb_k/sentiment_t4
.json
View file @
bb064f34
{
"datasets"
:
[
"
dblp/dblp_10000
_sorted.bracket"
"
sentiment/sentiment
_sorted.bracket"
],
"thresholds"
:
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
7.0
,
8
.0
4.0
,
5
.0
],
"algorithms"
:
[
{
"name"
:
"t_join"
,
"verification_algorithm"
:
"Touzet"
,
"upperbound"
:
"greedy"
},
{
"name"
:
"tang_join"
,
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"1"
},
{
"name"
:
"
leaf_distance_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"1"
},
{
"name"
:
"
label_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"1"
},
{
"name"
:
"
degree_histogram
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"1"
},
{
"name"
:
"
binary_branches
_join"
,
{
"name"
:
"
guha_rsb
_join"
,
"verification_algorithm"
:
"APTED"
,
"upperbound"
:
"none"
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"1"
}
]
}
\ No newline at end of file
configs/icde2019/sentiment_rsb_k/sentiment_t6.json
0 → 100644
View file @
bb064f34
{
"datasets"
:
[
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
6.0
,
7.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"2"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"2"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"2"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"2"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"2"
}
]
}
\ No newline at end of file
configs/icde2019/sentiment_rsb_k/sentiment_t8.json
0 → 100644
View file @
bb064f34
{
"datasets"
:
[
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
8.0
,
9.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"3"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"3"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"3"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"3"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"3"
}
]
}
\ No newline at end of file
configs/icde2019/sentiment_rsc_k/sentiment_t10.json
0 → 100644
View file @
bb064f34
{
"datasets"
:
[
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
10.0
,
11.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"4"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"4"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"4"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"4"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"4"
}
]
}
\ No newline at end of file
configs/icde2019/sentiment_rsc_k/sentiment_t12.json
0 → 100644
View file @
bb064f34
{
"datasets"
:
[
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
12.0
,
13.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"5"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"5"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"5"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"5"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"5"
}
]
}
\ No newline at end of file
configs/icde2019/sentiment_rsc_k/sentiment_t14.json
0 → 100644
View file @
bb064f34
{
"datasets"
:
[
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
14.0
,
15.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"6"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"6"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"6"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"6"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"6"
}
]
}
\ No newline at end of file
configs/icde2019/sentiment_rsc_k/sentiment_t2.json
0 → 100644
View file @
bb064f34
{
"datasets"
:
[
"sentiment/sentiment_sorted.bracket"
],
"thresholds"
:
[
1.0
,
2.0
,
3.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"0"
},