Commit a751b968 authored by Thomas Huetter's avatar Thomas Huetter
Browse files

added upperbound computation and updated sql scripts

parent 964c5bf1
......@@ -50,6 +50,7 @@ target_include_directories(
external/tree-similarity-private/src/parser
external/tree-similarity-private/src/tree_to_set_converter
external/tree-similarity-private/src/ted
external/tree-similarity-private/src/ted_ub
)
# Let the compiler know to find the header files in TreeSimilarity library.
......
......@@ -77,7 +77,9 @@ CREATE TABLE allpairs_self_join (
filter_verification_candidates bigint, -- Pairs of trees resulting from filter only.
filter_time bigint, -- Total time of the filter and its verification step.
verification_candidates bigint, -- Pairs of trees resulting after verification step of the filter.
verification_time bigint -- TED verification time.
verification_time bigint, -- TED verification time.
upperbound_time bigint, -- Time spent to compute upperbounds for verification candidates.
upperbound_pruned bigint -- Number of pairs in the result set by upperbound computation.
);
DROP TABLE IF EXISTS allpairs_multiset_baseline_self_join;
......@@ -103,7 +105,9 @@ CREATE TABLE allpairs_multiset_baseline_self_join (
filter_verification_candidates bigint, -- Pairs of trees resulting from filter only.
filter_time bigint, -- Total time of the filter and its verification step.
verification_candidates bigint, -- Pairs of trees resulting after verification step of the filter.
verification_time bigint -- TED verification time.
verification_time bigint, -- TED verification time.
upperbound_time bigint, -- Time spent to compute upperbounds for verification candidates.
upperbound_pruned bigint -- Number of pairs in the result set by upperbound computation.
);
DROP TABLE IF EXISTS allpairs_multiset_dsf_self_join;
......@@ -129,5 +133,7 @@ CREATE TABLE allpairs_multiset_dsf_self_join (
filter_verification_candidates bigint, -- Pairs of trees resulting from filter only.
filter_time bigint, -- Total time of the filter and its verification step.
verification_candidates bigint, -- Pairs of trees resulting after verification step of the filter.
verification_time bigint -- TED verification time.
verification_time bigint, -- TED verification time.
upperbound_time bigint, -- Time spent to compute upperbounds for verification candidates.
upperbound_pruned bigint -- Number of pairs in the result set by upperbound computation.
);
......@@ -47,6 +47,7 @@ void execute_naive_self_join(std::vector<node::Node<Label>>& trees_collection, d
naive_join->stop();
// Calculate optimum by verify only the resultset
// Initialized Timing object
Timing::Interval * optimum = timing.create_enroll("Optimum");
......@@ -86,7 +87,7 @@ void execute_naive_self_join(std::vector<node::Node<Label>>& trees_collection, d
template <typename Label, typename CostModel, typename SimilarityFunction, typename VerificationAlgorithm>
void execute_allpairs_self_join(std::vector<node::Node<Label>>& trees_collection, double similarity_threshold) {
// Initialize allpairs baseline
// Initialize allpairs
join::AllpairsGenericSelfJoin<Label, CostModel, SimilarityFunction, VerificationAlgorithm> absj;
Timing timing;
std::vector<join::JoinResultElement> result_set;
......@@ -125,6 +126,33 @@ void execute_allpairs_self_join(std::vector<node::Node<Label>>& trees_collection
std::cout << "\"filter_time\": " << allpairs->getfloat() << ", ";
std::cout << "\"verification_candidates\": " << join_candidates.size() << ", ";
ted_ub::GreedyUB<Label, CostModel> gub;
// Initialized Timing object
Timing::Interval * greedyub = timing.create_enroll("GreedyUB");
// Start timing
greedyub->start();
std::vector<std::pair<unsigned int, unsigned int>>::iterator it = join_candidates.begin();
while(it != join_candidates.end()) {
double ub_value = gub.verify(trees_collection[it->first],
trees_collection[it->second],
similarity_threshold);
if(ub_value <= similarity_threshold) {
result_set.emplace_back(it->first, it->second, ub_value);
it = join_candidates.erase(it);
}
else {
++it;
}
}
// Stop timing
greedyub->stop();
// Write timing
std::cout << "\"upperbound_time\": " << greedyub->getfloat() << ", ";
std::cout << "\"upperbound_pruned\": " << result_set.size() << ", ";
// Initialized Timing object
Timing::Interval * verify = timing.create_enroll("Verify");
......@@ -178,7 +206,7 @@ void execute_allpairs_self_join(std::vector<node::Node<Label>>& trees_collection
template <typename Label, typename CostModel, typename SimilarityFunction, typename VerificationAlgorithm>
void execute_allpairs_multiset_baseline_self_join(std::vector<node::Node<Label>>& trees_collection, double similarity_threshold) {
// Initialize allpairs baseline
// Initialize allpairs multiset baseline
join::AllpairsMultisetBaselineSelfJoin<Label, CostModel, SimilarityFunction, VerificationAlgorithm> absj;
Timing timing;
std::vector<join::JoinResultElement> result_set;
......@@ -217,6 +245,33 @@ void execute_allpairs_multiset_baseline_self_join(std::vector<node::Node<Label>>
std::cout << "\"filter_time\": " << allpairs->getfloat() << ", ";
std::cout << "\"verification_candidates\": " << join_candidates.size() << ", ";
ted_ub::GreedyUB<Label, CostModel> gub;
// Initialized Timing object
Timing::Interval * greedyub = timing.create_enroll("GreedyUB");
// Start timing
greedyub->start();
std::vector<std::pair<unsigned int, unsigned int>>::iterator it = join_candidates.begin();
while(it != join_candidates.end()) {
double ub_value = gub.verify(trees_collection[it->first],
trees_collection[it->second],
similarity_threshold);
if(ub_value <= similarity_threshold) {
result_set.emplace_back(it->first, it->second, ub_value);
it = join_candidates.erase(it);
}
else {
++it;
}
}
// Stop timing
greedyub->stop();
// Write timing
std::cout << "\"upperbound_time\": " << greedyub->getfloat() << ", ";
std::cout << "\"upperbound_pruned\": " << result_set.size() << ", ";
// Initialized Timing object
Timing::Interval * verify = timing.create_enroll("Verify");
......@@ -270,7 +325,7 @@ void execute_allpairs_multiset_baseline_self_join(std::vector<node::Node<Label>>
template <typename Label, typename CostModel, typename SimilarityFunction, typename VerificationAlgorithm>
void execute_allpairs_multiset_dsf_self_join(std::vector<node::Node<Label>>& trees_collection, double similarity_threshold) {
// Initialize allpairs baseline
// Initialize allpairs multiset duplicate
join::AllpairsMultisetDSFSelfJoin<Label, CostModel, SimilarityFunction, VerificationAlgorithm> absj;
Timing timing;
std::vector<join::JoinResultElement> result_set;
......@@ -309,6 +364,33 @@ void execute_allpairs_multiset_dsf_self_join(std::vector<node::Node<Label>>& tre
std::cout << "\"filter_time\": " << allpairs->getfloat() << ", ";
std::cout << "\"verification_candidates\": " << join_candidates.size() << ", ";
ted_ub::GreedyUB<Label, CostModel> gub;
// Initialized Timing object
Timing::Interval * greedyub = timing.create_enroll("GreedyUB");
// Start timing
greedyub->start();
std::vector<std::pair<unsigned int, unsigned int>>::iterator it = join_candidates.begin();
while(it != join_candidates.end()) {
double ub_value = gub.verify(trees_collection[it->first],
trees_collection[it->second],
similarity_threshold);
if(ub_value <= similarity_threshold) {
result_set.emplace_back(it->first, it->second, ub_value);
it = join_candidates.erase(it);
}
else {
++it;
}
}
// Stop timing
greedyub->stop();
// Write timing
std::cout << "\"upperbound_time\": " << greedyub->getfloat() << ", ";
std::cout << "\"upperbound_pruned\": " << result_set.size() << ", ";
// Initialized Timing object
Timing::Interval * verify = timing.create_enroll("Verify");
......@@ -471,4 +553,4 @@ int main(int argc, char** argv) {
}
return 0;
}
\ No newline at end of file
}
......@@ -48,5 +48,6 @@
#include "tree_to_set_converter_multiset_baseline.h"
#include "zhang_shasha.h"
#include "touzet.h"
#include "greedy_ub.h"
#endif // EXPERIMENTS_H
......@@ -122,4 +122,4 @@ for a in data['algorithms']:
result_data.update(experiment_params)
result_data.update(algorithm_params)
store_result(a['name'], result_data)
# print_result(a['name'], result_data)
print_result(a['name'], result_data)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment