Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mateusz Pawlik
ted-experiments
Commits
5b39e5f2
Commit
5b39e5f2
authored
Jan 11, 2019
by
Mateusz Pawlik
Browse files
Including new Guha versions.
parent
1ac6e65d
Changes
8
Hide whitespace changes
Inline
Side-by-side
configs/icde2019_revision/dblp_10k_t1_guha_rsb.json
0 → 100644
View file @
5b39e5f2
{
"datasets"
:
[
"dblp/dblp_10000_sorted.bracket"
],
"thresholds"
:
[
1.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"64"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"128"
,
"reference_set_id"
:
"-1"
}
]
}
\ No newline at end of file
configs/icde2019_revision/dblp_10k_t1_guha_rsc.json
0 → 100644
View file @
5b39e5f2
{
"datasets"
:
[
"dblp/dblp_10000_sorted.bracket"
],
"thresholds"
:
[
1.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"64"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"128"
,
"reference_set_id"
:
"-1"
}
]
}
\ No newline at end of file
configs/icde2019_revision/dblp_10k_t2_guha_rsb.json
0 → 100644
View file @
5b39e5f2
{
"datasets"
:
[
"dblp/dblp_10000_sorted.bracket"
],
"thresholds"
:
[
2.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"64"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsb_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"128"
,
"reference_set_id"
:
"-1"
}
]
}
\ No newline at end of file
configs/icde2019_revision/dblp_10k_t2_guha_rsc.json
0 → 100644
View file @
5b39e5f2
{
"datasets"
:
[
"dblp/dblp_10000_sorted.bracket"
],
"thresholds"
:
[
2.0
],
"algorithms"
:
[
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"2"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"4"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"8"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"16"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"32"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"64"
,
"reference_set_id"
:
"-1"
},
{
"name"
:
"guha_rsc_join"
,
"verification_algorithm"
:
"APTED"
,
"reference_set_size"
:
"128"
,
"reference_set_id"
:
"-1"
}
]
}
\ No newline at end of file
db/create_db.sql
View file @
5b39e5f2
...
...
@@ -250,8 +250,8 @@ CREATE TABLE histogram_join (
upperbound_pruned
bigint
-- Number of pairs in the result set by upperbound computation.
);
DROP
TABLE
IF
EXISTS
guha_join
;
CREATE
TABLE
guha_join
(
DROP
TABLE
IF
EXISTS
guha_
rsb_
join
;
CREATE
TABLE
guha_
rsb_
join
(
-- Common attributes.
execution_id
serial
PRIMARY
KEY
,
experiments_version
varchar
(
127
),
...
...
@@ -265,7 +265,34 @@ CREATE TABLE guha_join (
-- Algorithm-specific attributes.
verification_algorithm
varchar
(
31
),
vectors_time
bigint
,
candidates_time
bigint
,
metric_candidates_time
bigint
,
sc_candidates_time
bigint
,
ted_verification_candidates
bigint
,
verification_time
bigint
,
-- TED verification time.
l_t_candidates
bigint
,
sed_candidates
bigint
,
u_t_result_pairs
bigint
,
cted_result_pairs
bigint
,
reference_set_size
int
);
DROP
TABLE
IF
EXISTS
guha_rsc_join
;
CREATE
TABLE
guha_rsc_join
(
-- Common attributes.
execution_id
serial
PRIMARY
KEY
,
experiments_version
varchar
(
127
),
experiments_timestamp
timestamp
,
hostname
varchar
(
127
),
dataset_filename
varchar
(
127
)
REFERENCES
dataset
(
filename
),
dataset_parsing_time
bigint
,
algorithm_version
varchar
(
127
),
threshold
decimal
,
join_result_size
bigint
,
-- Algorithm-specific attributes.
verification_algorithm
varchar
(
31
),
vectors_time
bigint
,
metric_candidates_time
bigint
,
sc_candidates_time
bigint
,
ted_verification_candidates
bigint
,
verification_time
bigint
,
-- TED verification time.
l_t_candidates
bigint
,
...
...
src/join_algs/join_algs_experiments.cc
View file @
5b39e5f2
...
...
@@ -969,8 +969,8 @@ void execute_tang_join(std::vector<node::Node<Label>>& trees_collection,
}
template
<
typename
Label
,
typename
CostModel
,
typename
VerificationAlgorithm
>
void
execute_guha_join
(
std
::
vector
<
node
::
Node
<
Label
>>&
trees_collection
,
double
distance_threshold
,
unsigned
int
reference_set_size
)
{
void
execute_guha_
rsb_
join
(
std
::
vector
<
node
::
Node
<
Label
>>&
trees_collection
,
double
distance_threshold
,
unsigned
int
reference_set_size
,
int
reference_set_id
)
{
// Initialize join algorithm
join
::
Guha
<
Label
,
CostModel
,
VerificationAlgorithm
>
guha_join
;
Timing
timing
;
...
...
@@ -983,10 +983,16 @@ void execute_guha_join(std::vector<node::Node<Label>>& trees_collection,
// Start timing
tree_to_set
->
start
();
// Get a random reference set.
std
::
vector
<
unsigned
int
>
reference_set
=
guha_join
.
get_random_reference_set
(
trees_collection
,
reference_set_size
);
// In case there is no reference set, get a random reference set.
std
::
vector
<
unsigned
int
>
reference_set
;
if
(
reference_set_id
==
-
1
)
{
reference_set
=
guha_join
.
get_random_reference_set
(
trees_collection
,
reference_set_size
);
}
else
{
auto
first
=
reference_sets_
[
reference_set_id
].
begin
();
auto
last
=
reference_sets_
[
reference_set_id
].
begin
()
+
reference_set_size
;
reference_set
=
std
::
vector
<
unsigned
int
>
(
first
,
last
);
}
// Initialize vectors.
std
::
vector
<
std
::
vector
<
double
>>
ted_vectors
(
trees_collection
.
size
(),
std
::
vector
<
double
>
(
reference_set
.
size
()));
// Compute the vectors.
...
...
@@ -999,20 +1005,101 @@ void execute_guha_join(std::vector<node::Node<Label>>& trees_collection,
std
::
cout
<<
"
\"
vectors_time
\"
: "
<<
tree_to_set
->
getfloat
()
<<
", "
;
{
Timing
::
Interval
*
retMetricCand
=
timing
.
create_enroll
(
"RetrieveMetricCandidates"
);
retMetricCand
->
start
();
std
::
vector
<
std
::
pair
<
unsigned
int
,
unsigned
int
>>
join_candidates
;
guha_join
.
retrieve_metric_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
,
ted_vectors
);
retMetricCand
->
stop
();
Timing
::
Interval
*
retSCCand
=
timing
.
create_enroll
(
"RetrieveSCCandidates"
);
retSCCand
->
start
();
guha_join
.
retrieve_sc_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
);
retSCCand
->
stop
();
// Write timing
std
::
cout
<<
"
\"
metric_candidates_time
\"
: "
<<
retMetricCand
->
getfloat
()
<<
", "
;
std
::
cout
<<
"
\"
sc_candidates_time
\"
: "
<<
retSCCand
->
getfloat
()
<<
", "
;
std
::
cout
<<
"
\"
ted_verification_candidates
\"
: "
<<
join_candidates
.
size
()
<<
", "
;
std
::
cout
<<
"
\"
l_t_candidates
\"
: "
<<
guha_join
.
get_l_t_candidates
()
<<
", "
;
std
::
cout
<<
"
\"
sed_candidates
\"
: "
<<
guha_join
.
get_sed_candidates
()
<<
", "
;
std
::
cout
<<
"
\"
u_t_result_pairs
\"
: "
<<
guha_join
.
get_u_t_result_pairs
()
<<
", "
;
std
::
cout
<<
"
\"
cted_result_pairs
\"
: "
<<
guha_join
.
get_cted_result_pairs
()
<<
", "
;
// Initialized Timing object
Timing
::
Interval
*
retCand
=
timing
.
create_enroll
(
"
RetrieveCandidates
"
);
Timing
::
Interval
*
verify
=
timing
.
create_enroll
(
"
Verify
"
);
// Start timing
retCand
->
start
();
verify
->
start
();
// Retrieve candidates for tjoin's candidate index
std
::
vector
<
std
::
pair
<
unsigned
int
,
unsigned
int
>>
join_candidates
;
guha_join
.
retrieve_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
,
reference_set
,
ted_vectors
);
// Verify all computed join candidates and return the join result
guha_join
.
verify_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
);
// Stop timing
retCand
->
stop
();
verify
->
stop
();
// Write timing
std
::
cout
<<
"
\"
verification_time
\"
: "
<<
verify
->
getfloat
()
<<
", "
;
}
std
::
cout
<<
"
\"
join_result_size
\"
: "
<<
join_result
.
size
()
<<
"}"
<<
std
::
endl
;
}
// Calculate optimum by verify only the resultset
// Initialized Timing object
Timing
::
Interval
*
optimum
=
timing
.
create_enroll
(
"Optimum"
);
// Start timing
optimum
->
start
();
}
template
<
typename
Label
,
typename
CostModel
,
typename
VerificationAlgorithm
>
void
execute_guha_rsc_join
(
std
::
vector
<
node
::
Node
<
Label
>>&
trees_collection
,
double
distance_threshold
,
unsigned
int
reference_set_size
,
int
reference_set_id
)
{
// Initialize join algorithm
join
::
Guha
<
Label
,
CostModel
,
VerificationAlgorithm
>
guha_join
;
Timing
timing
;
std
::
vector
<
join
::
JoinResultElement
>
join_result
;
// Add some scopes to ensure that the memory is deallocated
{
// Initialized Timing object
Timing
::
Interval
*
tree_to_set
=
timing
.
create_enroll
(
"Vectors"
);
// Start timing
tree_to_set
->
start
();
// In case there is no reference set, get a random reference set.
std
::
vector
<
unsigned
int
>
reference_set
;
if
(
reference_set_id
==
-
1
)
{
reference_set
=
guha_join
.
get_random_reference_set
(
trees_collection
,
reference_set_size
);
}
else
{
auto
first
=
reference_sets_
[
reference_set_id
].
begin
();
auto
last
=
reference_sets_
[
reference_set_id
].
begin
()
+
reference_set_size
;
reference_set
=
std
::
vector
<
unsigned
int
>
(
first
,
last
);
}
// Initialize vectors.
std
::
vector
<
std
::
vector
<
double
>>
lb_vectors
(
trees_collection
.
size
(),
std
::
vector
<
double
>
(
reference_set
.
size
()));
std
::
vector
<
std
::
vector
<
double
>>
ub_vectors
(
trees_collection
.
size
(),
std
::
vector
<
double
>
(
reference_set
.
size
()));
// Compute the vectors.
guha_join
.
compute_vectors
(
trees_collection
,
reference_set
,
lb_vectors
,
ub_vectors
);
// Stop timing
tree_to_set
->
stop
();
// Write timing
std
::
cout
<<
"
\"
vectors_time
\"
: "
<<
tree_to_set
->
getfloat
()
<<
", "
;
{
Timing
::
Interval
*
retMetricCand
=
timing
.
create_enroll
(
"RetrieveMetricCandidates"
);
retMetricCand
->
start
();
std
::
vector
<
std
::
pair
<
unsigned
int
,
unsigned
int
>>
join_candidates
;
guha_join
.
retrieve_metric_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
,
lb_vectors
,
ub_vectors
);
retMetricCand
->
stop
();
Timing
::
Interval
*
retSCCand
=
timing
.
create_enroll
(
"RetrieveSCCandidates"
);
retSCCand
->
start
();
guha_join
.
retrieve_sc_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
);
retSCCand
->
stop
();
// Write timing
std
::
cout
<<
"
\"
candidates_time
\"
: "
<<
retCand
->
getfloat
()
<<
", "
;
std
::
cout
<<
"
\"
metric_candidates_time
\"
: "
<<
retMetricCand
->
getfloat
()
<<
", "
;
std
::
cout
<<
"
\"
sc_candidates_time
\"
: "
<<
retSCCand
->
getfloat
()
<<
", "
;
std
::
cout
<<
"
\"
ted_verification_candidates
\"
: "
<<
join_candidates
.
size
()
<<
", "
;
std
::
cout
<<
"
\"
l_t_candidates
\"
: "
<<
guha_join
.
get_l_t_candidates
()
<<
", "
;
std
::
cout
<<
"
\"
sed_candidates
\"
: "
<<
guha_join
.
get_sed_candidates
()
<<
", "
;
...
...
@@ -1025,7 +1112,7 @@ void execute_guha_join(std::vector<node::Node<Label>>& trees_collection,
verify
->
start
();
// Verify all computed join candidates and return the join result
guha_join
.
verify_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
,
ted_vectors
);
guha_join
.
verify_candidates
(
trees_collection
,
join_candidates
,
join_result
,
distance_threshold
);
// Stop timing
verify
->
stop
();
...
...
@@ -1050,6 +1137,22 @@ int main(int argc, char** argv) {
using
Touzet
=
ted
::
Touzet
<
Label
,
CostModel
>
;
using
APTED
=
ted
::
APTED
<
Label
,
CostModel
>
;
// [0] Sentiment tau = 1:
reference_sets_
.
push_back
({
18
,
11
,
2
,
33
,
20
,
293
,
91
,
65
,
6426
,
5113
,
1338
,
1257
,
317
,
258
,
254
,
252
,
237
,
229
,
137
,
111
,
86
,
81
,
44
,
41
,
28
,
14
});
// [1] Sentiment tau = 2:
reference_sets_
.
push_back
({
12
,
57
,
105
,
96
,
74
,
92
,
49
,
284
,
127
,
37
,
302
,
158
,
136
,
133
,
86
,
34
,
296
,
293
,
173
,
156
,
138
,
53
,
6426
,
5113
,
1338
,
1257
,
462
,
447
,
431
,
358
,
318
,
299
,
285
,
245
,
278
,
272
,
259
,
254
,
251
,
223
,
236
,
228
,
221
,
203
,
164
,
144
,
128
,
119
,
117
,
114
,
112
,
81
,
51
});
// [2] Sentiment tau = 3:
reference_sets_
.
push_back
({
19
,
55
,
80
,
295
,
286
,
93
,
49
,
123
,
161
,
175
,
220
,
224
,
156
,
146
,
272
,
194
,
260
,
423
,
321
,
293
,
268
,
244
,
227
,
221
,
204
,
200
,
108
,
86
,
6426
,
5113
,
1338
,
1258
,
763
,
786
,
671
,
373
,
443
,
470
,
466
,
462
,
446
,
440
,
430
,
419
,
405
,
393
,
329
,
359
,
354
,
327
,
317
,
259
,
251
,
237
,
232
,
214
,
210
,
206
,
181
,
180
,
56
});
// [3] Sentiment tau = 4:
reference_sets_
.
push_back
({
40
,
228
,
177
,
296
,
251
,
236
,
194
,
155
,
371
,
43
,
669
,
282
,
384
,
723
,
459
,
624
,
358
,
446
,
134
,
323
,
691
,
329
,
474
,
402
,
609
,
317
,
328
,
235
,
212
,
213
,
260
,
6426
,
5113
,
1338
,
1258
,
763
,
786
,
737
,
728
,
716
,
680
,
672
,
601
,
546
,
438
,
468
,
448
,
439
,
431
,
412
,
330
,
362
,
349
,
315
,
326
,
311
,
307
,
290
,
207
,
69
,
1232
,
722
,
599
,
511
,
475
,
461
});
// [4] Sentiment tau = 5:
reference_sets_
.
push_back
({
40
,
144
,
290
,
354
,
346
,
377
,
565
,
362
,
458
,
438
,
467
,
225
,
723
,
452
,
550
,
457
,
398
,
695
,
672
,
329
,
602
,
510
,
426
,
390
,
393
,
311
,
163
,
871
,
739
,
769
,
730
,
686
,
615
,
359
,
324
,
416
,
406
,
410
,
402
,
330
,
340
,
535
,
6426
,
5113
,
1841
,
809
,
1337
,
1257
,
1163
,
763
,
722
,
940
,
842
,
786
,
479
,
724
,
705
,
598
,
620
,
569
,
546
,
537
,
464
,
421
,
322
,
412
,
401
,
369
,
364
,
1204
,
948
,
731
,
706
,
699
,
675
,
555
,
529
,
492
,
462
});
// [5] Sentiment tau = 6:
reference_sets_
.
push_back
({
81
,
424
,
546
,
720
,
510
,
624
,
592
,
311
,
907
,
529
,
728
,
640
,
344
,
770
,
472
,
938
,
725
,
454
,
396
,
714
,
500
,
970
,
486
,
568
,
549
,
305
,
867
,
689
,
673
,
1258
,
871
,
974
,
943
,
810
,
480
,
693
,
667
,
427
,
575
,
453
,
648
,
483
,
6425
,
5113
,
1837
,
1458
,
1360
,
1337
,
1322
,
1227
,
1185
,
1046
,
1037
,
885
,
918
,
864
,
855
,
816
,
471
,
705
,
699
,
637
,
629
,
591
,
559
,
524
,
496
,
1510
,
1204
,
844
,
1099
,
710
,
948
,
917
,
784
,
764
,
704
,
675
,
603
,
585
});
// [0] Sentiment tau = 7:
reference_sets_
.
push_back
({
81
,
462
,
677
,
348
,
622
,
659
,
733
,
801
,
520
,
911
,
483
,
478
,
649
,
772
,
616
,
504
,
820
,
487
,
1905
,
756
,
809
,
946
,
938
,
915
,
1511
,
502
,
932
,
866
,
707
,
607
,
1837
,
1111
,
1350
,
1227
,
952
,
1103
,
670
,
815
,
791
,
703
,
528
,
482
,
1187
,
641
,
6426
,
5113
,
1778
,
1458
,
1337
,
1054
,
1232
,
1176
,
1193
,
1191
,
1161
,
851
,
1030
,
763
,
1022
,
896
,
943
,
888
,
686
,
786
,
788
,
577
,
628
,
571
,
526
,
503
,
1621
,
1523
,
957
,
825
,
936
,
916
,
917
,
856
,
840
,
813
,
807
,
518
,
784
,
765
,
742
,
652
,
618
,
613
,
603
,
589
});
Timing
timing
;
// Write results as JSON object to stdout
...
...
@@ -1154,14 +1257,25 @@ int main(int argc, char** argv) {
}
else
if
(
argv
[
4
]
==
std
::
string
(
"APTED"
))
{
execute_histogram_join
<
Label
,
CostModel
,
APTED
>
(
trees_collection
,
upperbound
,
distance_threshold
);
}
}
else
if
(
argv
[
3
]
==
std
::
string
(
"guha_join"
))
{
}
else
if
(
argv
[
3
]
==
std
::
string
(
"guha_rsb_join"
))
{
unsigned
int
reference_set_size
=
std
::
stoi
(
argv
[
7
]);
int
reference_set_id
=
std
::
stoi
(
argv
[
8
]);
if
(
argv
[
4
]
==
std
::
string
(
"ZhangShasha"
))
{
execute_guha_rsb_join
<
Label
,
CostModel
,
ZhangShasha
>
(
trees_collection
,
distance_threshold
,
reference_set_size
,
reference_set_id
);
}
else
if
(
argv
[
4
]
==
std
::
string
(
"Touzet"
))
{
execute_guha_rsb_join
<
Label
,
CostModel
,
Touzet
>
(
trees_collection
,
distance_threshold
,
reference_set_size
,
reference_set_id
);
}
else
if
(
argv
[
4
]
==
std
::
string
(
"APTED"
))
{
execute_guha_rsb_join
<
Label
,
CostModel
,
APTED
>
(
trees_collection
,
distance_threshold
,
reference_set_size
,
reference_set_id
);
}
}
else
if
(
argv
[
3
]
==
std
::
string
(
"guha_rsc_join"
))
{
unsigned
int
reference_set_size
=
std
::
stoi
(
argv
[
7
]);
int
reference_set_id
=
std
::
stoi
(
argv
[
8
]);
if
(
argv
[
4
]
==
std
::
string
(
"ZhangShasha"
))
{
execute_guha_join
<
Label
,
CostModel
,
ZhangShasha
>
(
trees_collection
,
distance_threshold
,
reference_set_size
);
execute_guha_
rsc_
join
<
Label
,
CostModel
,
ZhangShasha
>
(
trees_collection
,
distance_threshold
,
reference_set_size
,
reference_set_id
);
}
else
if
(
argv
[
4
]
==
std
::
string
(
"Touzet"
))
{
execute_guha_join
<
Label
,
CostModel
,
Touzet
>
(
trees_collection
,
distance_threshold
,
reference_set_size
);
execute_guha_
rsc_
join
<
Label
,
CostModel
,
Touzet
>
(
trees_collection
,
distance_threshold
,
reference_set_size
,
reference_set_id
);
}
else
if
(
argv
[
4
]
==
std
::
string
(
"APTED"
))
{
execute_guha_join
<
Label
,
CostModel
,
APTED
>
(
trees_collection
,
distance_threshold
,
reference_set_size
);
execute_guha_
rsc_
join
<
Label
,
CostModel
,
APTED
>
(
trees_collection
,
distance_threshold
,
reference_set_size
,
reference_set_id
);
}
}
...
...
src/join_algs/join_algs_experiments.h
View file @
5b39e5f2
...
...
@@ -54,4 +54,6 @@
#include
"apted.h"
#include
"guha.h"
std
::
vector
<
std
::
vector
<
unsigned
int
>>
reference_sets_
;
#endif // JOIN_ALGS_EXPERIMENTS_H
src/join_algs/join_algs_experiments.py
View file @
5b39e5f2
...
...
@@ -139,10 +139,18 @@ def main():
"verification_algorithm"
:
a
[
'verification_algorithm'
]
}
cmd
.
extend
((
binary_name
,
d
,
str
(
t
),
a
[
'name'
],
a
[
'verification_algorithm'
],
""
,
a
[
'upperbound'
]))
elif
a
[
'name'
]
==
'guha_join'
:
elif
a
[
'name'
]
==
'guha_
rsb_
join'
:
algorithm_params
=
{
"verification_algorithm"
:
a
[
'verification_algorithm'
],
"reference_set_size"
:
a
[
'reference_set_size'
]
"reference_set_size"
:
a
[
'reference_set_size'
],
"reference_set_id"
:
a
[
'reference_set_id'
]
}
cmd
.
extend
((
binary_name
,
d
,
str
(
t
),
a
[
'name'
],
a
[
'verification_algorithm'
],
""
,
""
,
a
[
'reference_set_size'
]))
elif
a
[
'name'
]
==
'guha_rsc_join'
:
algorithm_params
=
{
"verification_algorithm"
:
a
[
'verification_algorithm'
],
"reference_set_size"
:
a
[
'reference_set_size'
],
"reference_set_id"
:
a
[
'reference_set_id'
]
}
cmd
.
extend
((
binary_name
,
d
,
str
(
t
),
a
[
'name'
],
a
[
'verification_algorithm'
],
""
,
""
,
a
[
'reference_set_size'
]))
cmd_output
=
get_stdout_cmd
(
cmd
).
strip
()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment