Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mateusz Pawlik
ted-experiments
Commits
8ef9c151
Commit
8ef9c151
authored
Jan 10, 2019
by
Mateusz Pawlik
Browse files
Added Guha to experiments.
parent
78023e35
Changes
5
Hide whitespace changes
Inline
Side-by-side
CMakeLists.txt
View file @
8ef9c151
...
...
@@ -71,6 +71,8 @@ target_include_directories(
external/tree-similarity/src/parser
external/tree-similarity/src/ted
external/tree-similarity/src/ted_ub
external/tree-similarity/src/ted_lb
external/tree-similarity/src/join/guha
)
# Let the compiler know to find the header files in TreeSimilarity library.
...
...
db/create_db.sql
View file @
8ef9c151
...
...
@@ -250,6 +250,31 @@ CREATE TABLE histogram_join (
upperbound_pruned
bigint
-- Number of pairs in the result set by upperbound computation.
);
DROP
TABLE
IF
EXISTS
guha_join
;
CREATE
TABLE
guha_join
(
-- Common attributes.
execution_id
serial
PRIMARY
KEY
,
experiments_version
varchar
(
127
),
experiments_timestamp
timestamp
,
hostname
varchar
(
127
),
dataset_filename
varchar
(
127
)
REFERENCES
dataset
(
filename
),
dataset_parsing_time
bigint
,
algorithm_version
varchar
(
127
),
threshold
decimal
,
join_result_size
bigint
,
-- Algorithm-specific attributes.
verification_algorithm
varchar
(
31
),
vectors_time
bigint
,
candidates_time
bigint
,
ted_verification_candidates
bigint
,
verification_time
bigint
,
-- TED verification time.
l_t_candidates
bigint
,
sed_candidates
bigint
,
u_t_result_pairs
bigint
,
cted_result_pairs
bigint
,
reference_set_size
int
);
-- Parameters of a ted experiment (for normalization):
-- ted_experiment_timestamp timestamp,
...
...
src/join_algs/join_algs_experiments.cc
View file @
8ef9c151
...
...
@@ -968,6 +968,81 @@ void execute_tang_join(std::vector<node::Node<Label>>& trees_collection,
std
::
cout
<<
"
\"
optimum_time
\"
: "
<<
optimum
->
getfloat
()
<<
"}"
<<
std
::
endl
;
}
template
<
typename
Label
,
typename
CostModel
,
typename
VerificationAlgorithm
>
void
execute_guha_join
(
std
::
vector
<
node
::
Node
<
Label
>>&
trees_collection
,
double
distance_threshold
,
unsigned
int
reference_set_size
)
{
// Initialize join algorithm
join
::
Guha
<
Label
,
CostModel
,
VerificationAlgorithm
>
guha_join
;
Timing
timing
;
std
::
vector
<
join
::
JoinResultElement
>
join_result
;
// Add some scopes to ensure that the memory is deallocated
{
// Initialized Timing object
Timing
::
Interval
*
tree_to_set
=
timing
.
create_enroll
(
"Vectors"
);
// Start timing
tree_to_set
->
start
();
// Get a random reference set.
std
::
vector
<
unsigned
int
>
reference_set
=
guha_join
.
get_random_reference_set
(
trees_collection
,
reference_set_size
);
// Initialize vectors.
std
::
vector
<
std
::
vector
<
double
>>
ted_vectors
(
trees_collection
.
size
(),
std
::
vector
<
double
>
(
reference_set
.
size
()));
// Compute the vectors.
guha_join
.
compute_vectors
(
trees_collection
,
reference_set
,
ted_vectors
);
// Stop timing
tree_to_set
->
stop
();
// Write timing
std
::
cout
<<
"
\"
vectors_time
\"
: "
<<
tree_to_set
->
getfloat
()
<<
", "
;
{
// Initialized Timing object
Timing
::
Interval
*
retCand
=
timing
.
create_enroll
(
"RetrieveCandidates"
);
// Start timing
retCand
->
start
();
// Retrieve candidates for tjoin's candidate index
std
::
vector
<
std
::
pair
<
unsigned
int
,
unsigned
int
>>
join_candidates
;
guha_join
.
retrieve_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
,
reference_set
,
ted_vectors
);
// Stop timing
retCand
->
stop
();
// Write timing
std
::
cout
<<
"
\"
candidates_time
\"
: "
<<
retCand
->
getfloat
()
<<
", "
;
std
::
cout
<<
"
\"
ted_verification_candidates
\"
: "
<<
join_candidates
.
size
()
<<
", "
;
std
::
cout
<<
"
\"
l_t_candidates
\"
: "
<<
guha_join
.
get_l_t_candidates
()
<<
", "
;
std
::
cout
<<
"
\"
sed_candidates
\"
: "
<<
guha_join
.
get_sed_candidates
()
<<
", "
;
std
::
cout
<<
"
\"
u_t_result_pairs
\"
: "
<<
guha_join
.
get_u_t_result_pairs
()
<<
", "
;
std
::
cout
<<
"
\"
cted_result_pairs
\"
: "
<<
guha_join
.
get_cted_result_pairs
()
<<
", "
;
// Initialized Timing object
Timing
::
Interval
*
verify
=
timing
.
create_enroll
(
"Verify"
);
// Start timing
verify
->
start
();
// Verify all computed join candidates and return the join result
guha_join
.
verify_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
,
ted_vectors
);
// Stop timing
verify
->
stop
();
// Write timing
std
::
cout
<<
"
\"
verification_time
\"
: "
<<
verify
->
getfloat
()
<<
", "
;
}
std
::
cout
<<
"
\"
join_result_size
\"
: "
<<
join_result
.
size
()
<<
"}"
<<
std
::
endl
;
}
// Calculate optimum by verify only the resultset
// Initialized Timing object
Timing
::
Interval
*
optimum
=
timing
.
create_enroll
(
"Optimum"
);
// Start timing
optimum
->
start
();
}
int
main
(
int
argc
,
char
**
argv
)
{
using
Label
=
label
::
StringLabel
;
using
CostModel
=
cost_model
::
UnitCostModel
<
Label
>
;
...
...
@@ -1079,6 +1154,15 @@ int main(int argc, char** argv) {
}
else
if
(
argv
[
4
]
==
std
::
string
(
"APTED"
))
{
execute_histogram_join
<
Label
,
CostModel
,
APTED
>
(
trees_collection
,
upperbound
,
distance_threshold
);
}
}
else
if
(
argv
[
3
]
==
std
::
string
(
"guha_join"
))
{
unsigned
int
reference_set_size
=
std
::
stoi
(
argv
[
7
]);
if
(
argv
[
4
]
==
std
::
string
(
"ZhangShasha"
))
{
execute_guha_join
<
Label
,
CostModel
,
ZhangShasha
>
(
trees_collection
,
distance_threshold
,
reference_set_size
);
}
else
if
(
argv
[
4
]
==
std
::
string
(
"Touzet"
))
{
execute_guha_join
<
Label
,
CostModel
,
Touzet
>
(
trees_collection
,
distance_threshold
,
reference_set_size
);
}
else
if
(
argv
[
4
]
==
std
::
string
(
"APTED"
))
{
execute_guha_join
<
Label
,
CostModel
,
APTED
>
(
trees_collection
,
distance_threshold
,
reference_set_size
);
}
}
return
0
;
...
...
src/join_algs/join_algs_experiments.h
View file @
8ef9c151
...
...
@@ -52,5 +52,6 @@
#include
"touzet.h"
#include
"greedy_ub.h"
#include
"apted.h"
#include
"guha.h"
#endif // JOIN_ALGS_EXPERIMENTS_H
src/join_algs/join_algs_experiments.py
View file @
8ef9c151
...
...
@@ -139,6 +139,12 @@ def main():
"verification_algorithm"
:
a
[
'verification_algorithm'
]
}
cmd
.
extend
((
binary_name
,
d
,
str
(
t
),
a
[
'name'
],
a
[
'verification_algorithm'
],
""
,
a
[
'upperbound'
]))
elif
a
[
'name'
]
==
'guha_join'
:
algorithm_params
=
{
"verification_algorithm"
:
a
[
'verification_algorithm'
],
"reference_set_size"
:
a
[
'reference_set_size'
]
}
cmd
.
extend
((
binary_name
,
d
,
str
(
t
),
a
[
'name'
],
a
[
'verification_algorithm'
],
""
,
""
,
a
[
'reference_set_size'
]))
cmd_output
=
get_stdout_cmd
(
cmd
).
strip
()
result_data
=
json
.
loads
(
cmd_output
.
decode
(
'utf-8'
))
result_data
.
update
(
fixed_values
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment