Commit bb064f34 authored by Mateusz Pawlik's avatar Mateusz Pawlik
Browse files

Merge branch 'histograms' into develop

parents 78023e35 0037fefa
*.DS_Store
...@@ -71,6 +71,8 @@ target_include_directories( ...@@ -71,6 +71,8 @@ target_include_directories(
external/tree-similarity/src/parser external/tree-similarity/src/parser
external/tree-similarity/src/ted external/tree-similarity/src/ted
external/tree-similarity/src/ted_ub external/tree-similarity/src/ted_ub
external/tree-similarity/src/ted_lb
external/tree-similarity/src/join/guha
) )
# Let the compiler know to find the header files in TreeSimilarity library. # Let the compiler know to find the header files in TreeSimilarity library.
......
...@@ -7,7 +7,10 @@ Follow the instructions below to reproduce the environment and the experiments. ...@@ -7,7 +7,10 @@ Follow the instructions below to reproduce the environment and the experiments.
## ICDE 2019 Reproducibility ## ICDE 2019 Reproducibility
For reproducing the experiments of the ICDE 2019 submission, checkout the tag This repository contains experiments of our ICDE 2019 paper
[Effective Filters and Linear Time Verification for Tree Similarity Joins](http://eplus.uni-salzburg.at/obvusboa/download/pdf/4486886).
To reproduce the experiments of the ICDE 2019 paper, checkout the tag
`icde2019` of this and `icde2019` of this and
[Tree Similarity library](https://github.com/DatabaseGroup/tree-similarity/tree/develop) [Tree Similarity library](https://github.com/DatabaseGroup/tree-similarity/tree/develop)
repositories. repositories.
...@@ -16,8 +19,15 @@ Obtain datasets from our ...@@ -16,8 +19,15 @@ Obtain datasets from our
[Datasets repository](https://frosch.cosy.sbg.ac.at/mpawlik/ted-datasets). [Datasets repository](https://frosch.cosy.sbg.ac.at/mpawlik/ted-datasets).
Execute the experiments with all config files in `configs/icde2019` directory. Execute the experiments with all config files in `configs/icde2019` directory.
See execution details below. You may need to modify `--dataset_path` parameter
value when executing the experiments.
For LGM Upper Bound and BSM verification experiment, certain views must present
in the databse. After executing all experiments, execute `src/ted_algs/view_queries.sql`
on the database holding the experiment results.
Plot the results using `src/plots/call_plot.sh` file. Plot the results using `src/plots/create_all_plots.sh` file from `src/plots/`
directory.
## Build the project ## Build the project
......
...@@ -3,16 +3,30 @@ ...@@ -3,16 +3,30 @@
"bolzano/bolzano_sorted.bracket" "bolzano/bolzano_sorted.bracket"
], ],
"thresholds": [ "thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
16.0, 17.0, 18.0, 19.0, 20.0
], ],
"algorithms": [ "algorithms": [
{ "name": "t_join", { "name": "t_join",
"verification_algorithm" : "Touzet", "verification_algorithm" : "Touzet",
"upperbound": "greedy" "upperbound": "greedy"
}, },
{ "name": "histogram_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
},
{ "name": "binary_branches_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "-1"
},
{ "name": "tang_join", { "name": "tang_join",
"verification_algorithm" : "Touzet", "verification_algorithm" : "APTED",
"upperbound": "none" "upperbound": "none"
} }
] ]
......
{ {
"datasets": [ "datasets": [
"swissprot/swissprot_sorted.bracket" "dblp/dblp_10000_sorted.bracket",
"dblp/dblp_50000_sorted.bracket",
"dblp/dblp_100000_sorted.bracket"
], ],
"thresholds": [ "thresholds": [
1.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0 6.0
], ],
"algorithms": [ "algorithms": [
{ "name": "t_join", { "name": "t_join",
...@@ -18,6 +20,11 @@ ...@@ -18,6 +20,11 @@
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "upperbound": "none"
}, },
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "-1"
},
{ "name": "tang_join", { "name": "tang_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "upperbound": "none"
......
...@@ -3,16 +3,29 @@ ...@@ -3,16 +3,29 @@
"sentiment/sentiment_sorted.bracket" "sentiment/sentiment_sorted.bracket"
], ],
"thresholds": [ "thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
11.0, 12.0, 13.0, 14.0, 15.0 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0
], ],
"algorithms": [ "algorithms": [
{ "name": "t_join", { "name": "t_join",
"verification_algorithm" : "Touzet", "verification_algorithm" : "Touzet",
"upperbound": "greedy" "upperbound": "greedy"
}, },
{ "name": "histogram_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
},
{ "name": "binary_branches_join",
"verification_algorithm" : "APTED",
"upperbound": "none"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "-1"
},
{ "name": "tang_join", { "name": "tang_join",
"verification_algorithm" : "Touzet", "verification_algorithm" : "APTED",
"upperbound": "none" "upperbound": "none"
} }
] ]
......
...@@ -3,33 +3,33 @@ ...@@ -3,33 +3,33 @@
"sentiment/sentiment_sorted.bracket" "sentiment/sentiment_sorted.bracket"
], ],
"thresholds": [ "thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 10.0, 11.0
11.0, 12.0, 13.0, 14.0, 15.0
], ],
"algorithms": [ "algorithms": [
{ "name": "t_join", { "name": "guha_rsb_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "2",
"reference_set_id": "4"
}, },
{ "name": "leaf_distance_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "4",
"reference_set_id": "4"
}, },
{ "name": "label_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "8",
"reference_set_id": "4"
}, },
{ "name": "degree_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "16",
"reference_set_id": "4"
}, },
{ "name": "binary_branches_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "32",
"reference_set_id": "4"
} }
] ]
} }
\ No newline at end of file
...@@ -3,33 +3,33 @@ ...@@ -3,33 +3,33 @@
"sentiment/sentiment_sorted.bracket" "sentiment/sentiment_sorted.bracket"
], ],
"thresholds": [ "thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 12.0, 13.0
11.0, 12.0, 13.0, 14.0, 15.0
], ],
"algorithms": [ "algorithms": [
{ "name": "t_join", { "name": "guha_rsb_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "2",
"reference_set_id": "5"
}, },
{ "name": "leaf_distance_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "4",
"reference_set_id": "5"
}, },
{ "name": "label_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "8",
"reference_set_id": "5"
}, },
{ "name": "degree_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "16",
"reference_set_id": "5"
}, },
{ "name": "binary_branches_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "32",
"reference_set_id": "5"
} }
] ]
} }
\ No newline at end of file
{ {
"datasets": [ "datasets": [
"bolzano/bolzano_sorted.bracket" "sentiment/sentiment_sorted.bracket"
], ],
"thresholds": [ "thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 14.0, 15.0
11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0
], ],
"algorithms": [ "algorithms": [
{ "name": "t_join", { "name": "guha_rsb_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "2",
"reference_set_id": "6"
}, },
{ "name": "leaf_distance_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "4",
"reference_set_id": "6"
}, },
{ "name": "label_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "8",
"reference_set_id": "6"
}, },
{ "name": "degree_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "16",
"reference_set_id": "6"
}, },
{ "name": "binary_branches_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "32",
"reference_set_id": "6"
} }
] ]
} }
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
1.0, 2.0, 3.0
],
"algorithms": [
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "0"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "0"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "0"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "0"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "0"
}
]
}
\ No newline at end of file
{ {
"datasets": [ "datasets": [
"dblp/dblp_10000_sorted.bracket" "sentiment/sentiment_sorted.bracket"
], ],
"thresholds": [ "thresholds": [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 4.0, 5.0
], ],
"algorithms": [ "algorithms": [
{ "name": "t_join", { "name": "guha_rsb_join",
"verification_algorithm" : "Touzet",
"upperbound": "greedy"
},
{ "name": "tang_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "2",
"reference_set_id": "1"
}, },
{ "name": "leaf_distance_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "4",
"reference_set_id": "1"
}, },
{ "name": "label_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "8",
"reference_set_id": "1"
}, },
{ "name": "degree_histogram_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "16",
"reference_set_id": "1"
}, },
{ "name": "binary_branches_join", { "name": "guha_rsb_join",
"verification_algorithm" : "APTED", "verification_algorithm" : "APTED",
"upperbound": "none" "reference_set_size": "32",
"reference_set_id": "1"
} }
] ]
} }
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
6.0, 7.0
],
"algorithms": [
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "2"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "2"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "2"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "2"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "2"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
8.0, 9.0
],
"algorithms": [
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "3"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "3"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "3"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "3"
},
{ "name": "guha_rsb_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "3"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
10.0, 11.0
],
"algorithms": [
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "2",
"reference_set_id": "4"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "4",
"reference_set_id": "4"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "8",
"reference_set_id": "4"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "16",
"reference_set_id": "4"
},
{ "name": "guha_rsc_join",
"verification_algorithm" : "APTED",
"reference_set_size": "32",
"reference_set_id": "4"
}
]
}
\ No newline at end of file
{
"datasets": [
"sentiment/sentiment_sorted.bracket"
],
"thresholds": [
12.0, 13.0
],
"algorithms"