Commit 866f916f authored by Thomas Huetter's avatar Thomas Huetter
Browse files

/src/experiments: with this commit, simple experiments can be executed

parent 96fd6058
......@@ -41,9 +41,15 @@ add_library(
)
target_include_directories(
TreeSimilarity INTERFACE
external/tree-similarity-private/src/node
external/tree-similarity-private/src/allpairs
external/tree-similarity-private/src/cost_model
external/tree-similarity-private/src/data_structures
external/tree-similarity-private/src/join
external/tree-similarity-private/src/label
external/tree-similarity-private/src/node
external/tree-similarity-private/src/parser
external/tree-similarity-private/src/tree_to_set_converter
external/tree-similarity-private/src/zhang_shasha
)
# Let the compiler know to find the header files in TreeSimilarity library.
......
#include "experiments.h"
int main(int argc, char** argv) {
if(argc != 4) {
std::cout << "Uasge: ./experiments <input_file> <similarity_threshold> <algorithm>"
<< std::endl;
std::cout << "Algorithm:" << std::endl;
std::cout << "a ... all algorithms" << std::endl << "b ... allpairs baseline" << std::endl <<
"n ... naive join" << std::endl;
exit(-1);
}
using Label = label::StringLabel;
using CostModel = cost_model::UnitCostModel<Label>;
Timing timing;
// Path to file containing the input tree.
std::string file_path = argv[1];
// Set similarity threshold - maximum number of allowed edit operations.
double similarity_threshold = std::stod(argv[2]);
// Create the container to store all trees.
std::vector<node::Node<Label>> trees_collection;
////////////////////////////////////////////////////////////////////////
/// PARSE INPUT
////////////////////////////////////////////////////////////////////////
// Initialized Timing object
Timing::Interval * tfib25 = timing.create_enroll("fib25");
//start timing
tfib25->start();
//execute what you want to time
int sum = 0;
for (int i = 0; i < 1000000; ++i) {
++sum;
}
// stop timing
tfib25->stop();
std::cout << timing;
Timing::Interval * parse = timing.create_enroll("Parse");
// Start timing
parse->start();
// Parse the dataset.
parser::BracketNotationParser bnp;
// Verify the input format before parsing.
if (!bnp.validate_input(argv[1])) {
std::cerr << "Incorrect format of source tree. Is the number of opening and closing brackets equal?" << std::endl;
return -1;
bnp.parse_collection(trees_collection, file_path);
// Stop timing
parse->stop();
////////////////////////////////////////////////////////////////////////
/// ALLPAIRS BASELINE ALGORITHM
////////////////////////////////////////////////////////////////////////
// If algorithm is either all or allpairs baseline
if(argv[3] == std::string("a") || argv[3] == std::string("b")) {
// Initialize allpairs baseline
join::AllpairsBaselineSelfJoin<Label, CostModel> absj;
// Initialized Timing object
Timing::Interval * tree_to_set = timing.create_enroll("TreeToSet");
// Start timing
tree_to_set->start();
// Convert trees to sets and get the result.
std::vector<std::vector<unsigned int>> sets_collection =
absj.convert_trees_to_sets(trees_collection);
// Stop timing
tree_to_set->stop();
// Initialized Timing object
Timing::Interval * allpairs = timing.create_enroll("Allpairs");
// Start timing
allpairs->start();
// Compute candidate for the join with the allpairs algorithm
std::vector<std::pair<unsigned int, unsigned int>> join_candidates_absj =
absj.get_join_candidates(sets_collection, similarity_threshold);
// Stop timing
allpairs->stop();
std::cout << "#ABSJ: candidates=" << join_candidates_absj.size() << std::endl;
// Initialized Timing object
Timing::Interval * verify = timing.create_enroll("Verify");
// Start timing
verify->start();
// Verify all computed join candidates and return the join result
std::vector<join::JoinResultElement> result_set_absj =
absj.verify_candidates(trees_collection, join_candidates_absj,
similarity_threshold);
// Stop timing
verify->stop();
std::cout << "ABSJ: #resultset=" << result_set_absj.size() << std::endl;
}
////////////////////////////////////////////////////////////////////////
/// NAIVE SELF JOIN
////////////////////////////////////////////////////////////////////////
// If algorithm is either all or allpairs baseline
if(argv[3] == std::string("a") || argv[3] == std::string("n")) {
// Initialize allpairs baseline
join::NaiveSelfJoin<Label, CostModel> nsj;
// Initialized Timing object
Timing::Interval * naive_join = timing.create_enroll("NaiveJoin");
// Start timing
naive_join->start();
// Verify all computed join candidates and return the join result
std::vector<join::JoinResultElement> result_set_nsj =
nsj.execute_join(trees_collection, similarity_threshold);
// Stop timing
naive_join->stop();
std::cout << "NSJ: #resultset=" << result_set_nsj.size() << std::endl;
}
// Output timing statistics
std::cout << timing;
}
......@@ -7,5 +7,11 @@
#include "node.h"
#include "string_label.h"
#include "bracket_notation_parser.h"
#include "allpairs.h"
#include "allpairs_baseline_self_join.h"
#include "naive_self_join.h"
#include "unit_cost_model.h"
#include "tree_to_set_converter.h"
#include "zhang_shasha.h"
#endif // EXPERIMENTS_H
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment