souffle  2.0.2-371-g6315b36
btree_multiset_test.cpp
Go to the documentation of this file.
1 /*
2  * Souffle - A Datalog Compiler
3  * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved
4  * Licensed under the Universal Permissive License v 1.0 as shown at:
5  * - https://opensource.org/licenses/UPL
6  * - <souffle root>/licenses/SOUFFLE-UPL.txt
7  */
8 
9 /************************************************************************
10  *
11  * @file btree_multiset_test.cpp
12  *
13  * A test case testing the B-trees utilization as multisets.
14  *
15  ***********************************************************************/
16 
17 #include "tests/test.h"
18 
20 #include <algorithm>
21 #include <chrono>
22 #include <cstdlib>
23 #include <functional>
24 #include <iomanip>
25 #include <iostream>
26 #include <random>
27 #include <set>
28 #include <string>
29 #include <system_error>
30 #include <tuple>
31 #include <unordered_set>
32 #include <vector>
33 
34 namespace std {
35 
36 template <typename A, typename B>
37 struct hash<tuple<A, B>> {
38  std::size_t operator()(const tuple<A, B>& t) const {
39  auto a = std::hash<A>()(get<0>(t));
40  auto b = std::hash<B>()(get<1>(t));
41  // from http://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
42  return a ^ (b + 0x9e3779b9 + (a << 6) + (a >> 2));
43  }
44 };
45 
46 template <typename A, typename B>
47 std::ostream& operator<<(std::ostream& out, const tuple<A, B>& t) {
48  return out << "[" << get<0>(t) << "," << get<1>(t) << "]";
49 }
50 } // namespace std
51 
52 namespace souffle {
53 
54 namespace test {
55 
56 TEST(BTreeMultiSet, Basic) {
57  const bool DEBUG = false;
58 
59  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
60 
61  test_set t;
62 
63  EXPECT_EQ(3, test_set::max_keys_per_node);
64 
65  // check initial conditions
66  EXPECT_EQ(0u, t.size());
67  EXPECT_FALSE(t.contains(10));
68  EXPECT_FALSE(t.contains(12));
69  EXPECT_FALSE(t.contains(14));
70  EXPECT_EQ(0, t.getDepth());
71  EXPECT_EQ(0, t.getNumNodes());
72 
73  if (DEBUG) t.printTree();
74 
75  // add an element
76 
77  t.insert(12);
78  if (DEBUG) {
79  t.printTree();
80  std::cout << "\n";
81  }
82 
83  EXPECT_EQ(1u, t.size());
84  EXPECT_FALSE(t.contains(10));
85  EXPECT_TRUE(t.contains(12));
86  EXPECT_FALSE(t.contains(14));
87  EXPECT_EQ(1, t.getDepth());
88  EXPECT_EQ(1, t.getNumNodes());
89 
90  // add a larger element
91 
92  t.insert(14);
93  if (DEBUG) {
94  t.printTree();
95  std::cout << "\n";
96  }
97  EXPECT_EQ(2u, t.size());
98  EXPECT_FALSE(t.contains(10));
99  EXPECT_TRUE(t.contains(12));
100  EXPECT_TRUE(t.contains(14));
101  EXPECT_EQ(1, t.getDepth());
102  EXPECT_EQ(1, t.getNumNodes());
103 
104  // add a smaller element
105 
106  t.insert(10);
107  if (DEBUG) {
108  t.printTree();
109  std::cout << "\n";
110  }
111  EXPECT_EQ(3u, t.size());
112  EXPECT_TRUE(t.contains(10));
113  EXPECT_TRUE(t.contains(12));
114  EXPECT_TRUE(t.contains(14));
115  EXPECT_EQ(1, t.getDepth());
116  EXPECT_EQ(1, t.getNumNodes());
117 
118  // cause a split
119  t.insert(11);
120  if (DEBUG) {
121  t.printTree();
122  std::cout << "\n";
123  }
124  EXPECT_EQ(4u, t.size());
125  EXPECT_TRUE(t.contains(10));
126  EXPECT_TRUE(t.contains(11));
127  EXPECT_TRUE(t.contains(12));
128  EXPECT_TRUE(t.contains(14));
129 
130  if (DEBUG) {
131  t.printTree();
132  std::cout << "\n";
133  }
134 
135  t.insert(12);
136  EXPECT_EQ(5u, t.size());
137  t.insert(12);
138  EXPECT_EQ(6u, t.size());
139  if (DEBUG) {
140  t.printTree();
141  std::cout << "\n";
142  }
143 
144  t.insert(15);
145  if (DEBUG) {
146  t.printTree();
147  std::cout << "\n";
148  }
149 
150  t.insert(16);
151  if (DEBUG) {
152  t.printTree();
153  std::cout << "\n";
154  }
155 }
156 
157 TEST(BTreeMultiSet, Duplicates) {
158  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
159  test_set t;
160  for (int i = 0; i < 10; i++) {
161  t.insert(0);
162  }
163  EXPECT_EQ(10, t.size());
164  std::vector<int> data(t.begin(), t.end());
165  EXPECT_EQ(10, data.size());
166  for (int i = 0; i < 10; i++) {
167  EXPECT_EQ(0, data[i]);
168  }
169 }
170 
171 TEST(BTreeMultiSet, Incremental) {
172  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
173  test_set t;
174  int N = 1000;
175  for (int i = 0; i < N; i++) {
176  t.insert(i);
177  for (int j = 0; j < N; j++) {
178  EXPECT_EQ(j <= i, t.contains(j)) << "i=" << i << ", j=" << j;
179  }
180  }
181 }
182 
183 TEST(BTreeMultiSet, Decremental) {
184  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
185  test_set t;
186  int N = 1000;
187  for (int i = N; i >= 0; i--) {
188  t.insert(i);
189  for (int j = 0; j < N; j++) {
190  EXPECT_EQ(j >= i, t.contains(j)) << "i=" << i << ", j=" << j;
191  }
192  }
193 }
194 
195 TEST(BTreeMultiSet, Shuffled) {
196  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
197 
198  test_set t;
199 
200  int N = 10000;
201 
202  std::vector<int> data;
203  for (int i = 0; i < N; i++) {
204  data.push_back(i);
205  }
206  std::random_device rd;
207  std::mt19937 generator(rd());
208 
209  shuffle(data.begin(), data.end(), generator);
210 
211  for (int i = 0; i < N; i++) {
212  t.insert(data[i]);
213  }
214 
215  for (int i = 0; i < N; i++) {
216  EXPECT_TRUE(t.contains(i)) << "i=" << i;
217  }
218 }
219 
220 TEST(BTreeMultiSet, IteratorEmpty) {
221  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
222  test_set t;
223 
224  EXPECT_EQ(t.begin(), t.end());
225 }
226 
227 TEST(BTreeMultiSet, IteratorBasic) {
228  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
229  test_set t;
230 
231  for (int i = 0; i < 10; i++) {
232  t.insert(i);
233  }
234 
235  // t.printTree();
236 
237  auto it = t.begin();
238  auto e = t.end();
239 
240  EXPECT_NE(it, e);
241 
242  int last = -1;
243  for (int i : t) {
244  EXPECT_EQ(last + 1, i);
245  last = i;
246  }
247  EXPECT_EQ(last, 9);
248 }
249 
250 TEST(BTreeMultiSet, IteratorStress) {
251  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
252 
253  test_set t;
254 
255  int N = 1000;
256 
257  std::vector<int> data;
258  for (int i = 0; i < N; i++) {
259  data.push_back(i);
260  }
261  std::random_device rd;
262  std::mt19937 generator(rd());
263 
264  shuffle(data.begin(), data.end(), generator);
265 
266  for (int i = 0; i < N; i++) {
267  EXPECT_EQ((size_t)i, t.size());
268 
269  int last = -1;
270  for (int i : t) {
271  EXPECT_LT(last, i);
272  last = i;
273  }
274 
275  t.insert(data[i]);
276  }
277 }
278 
279 TEST(BTreeMultiSet, BoundaryTest) {
280  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
281 
282  test_set t;
283 
284  for (int i = 0; i < 10; i++) {
285  t.insert(i);
286  }
287 
288  // t.printTree();
289 
290  auto a = t.lower_bound(5);
291  EXPECT_EQ(5, *a);
292 
293  auto b = t.upper_bound(5);
294  EXPECT_EQ(6, *b);
295 
296  // add duplicates
297 
298  t.insert(5);
299  t.insert(5);
300  t.insert(5);
301 
302  // t.printTree(); std::cout << "\n";
303 
304  // test again ..
305  a = t.lower_bound(5);
306  EXPECT_EQ(5, *a);
307 
308  b = t.upper_bound(5);
309  EXPECT_EQ(6, *b);
310 
311  // check the distance
312  EXPECT_EQ(5, *a);
313  ++a;
314  EXPECT_EQ(5, *a);
315  ++a;
316  EXPECT_EQ(5, *a);
317  ++a;
318  EXPECT_EQ(5, *a);
319  ++a;
320  EXPECT_EQ(6, *a);
321 }
322 
323 TEST(BTreeMultiSet, BoundaryEmpty) {
324  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
325 
326  test_set t;
327 
328  EXPECT_EQ(t.end(), t.lower_bound(5));
329  EXPECT_EQ(t.end(), t.upper_bound(5));
330 
331  t.insert(4);
332 
333  EXPECT_EQ(t.lower_bound(3), t.upper_bound(3));
334  EXPECT_EQ(t.lower_bound(5), t.upper_bound(5));
335 
336  t.insert(6);
337  EXPECT_EQ(t.lower_bound(3), t.upper_bound(3));
338  EXPECT_EQ(t.lower_bound(5), t.upper_bound(5));
339 
340  t.insert(5);
341 
342  EXPECT_EQ(t.lower_bound(3), t.upper_bound(3));
343  EXPECT_NE(t.lower_bound(5), t.upper_bound(5));
344 }
345 
346 TEST(BTreeMultiSet, Load) {
347  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
348 
349  for (int N = 0; N < 100; N++) {
350  // generate some ordered data
351  std::vector<int> data;
352 
353  for (int i = 0; i < N; i++) {
354  data.push_back(i);
355  }
356  auto t = test_set::load(data.begin(), data.end());
357  EXPECT_EQ(data.size(), t.size());
358  EXPECT_TRUE(t.check());
359  int last = -1;
360  for (int c : t) {
361  EXPECT_EQ(last + 1, c);
362  last = c;
363  }
364  EXPECT_EQ(last, N - 1);
365  }
366 }
367 
368 TEST(BTreeMultiSet, Clear) {
369  using test_set = btree_multiset<int, detail::comparator<int>, std::allocator<int>, 16>;
370 
371  test_set t;
372 
373  EXPECT_TRUE(t.empty());
374 
375  t.insert(5);
376 
377  EXPECT_FALSE(t.empty());
378  t.clear();
379  EXPECT_TRUE(t.empty());
380 
381  t.clear();
382  EXPECT_TRUE(t.empty());
383 }
384 
385 using Entry = std::tuple<int, int>;
386 
387 std::vector<Entry> getData(unsigned numEntries) {
388  std::vector<Entry> res(numEntries);
389  int k = 0;
390  for (unsigned i = 0; i < numEntries; i++) {
391  res[k++] = Entry(i / 100, i % 100);
392  }
393  std::random_device rd;
394  std::mt19937 generator(rd());
395 
396  shuffle(res.begin(), res.end(), generator);
397  return res;
398 }
399 
401 
404 }
405 
406 long duration(const time_point& start, const time_point& end) {
407  return std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
408 }
409 
410 template <typename Op>
411 long time(const std::string& name, const Op& operation) {
412  std::cout << "\t" << std::setw(30) << std::setiosflags(std::ios::left) << name
413  << std::resetiosflags(std::ios::left) << " ... " << std::flush;
414  auto a = now();
415  operation();
416  auto b = now();
417  auto time = duration(a, b);
418  std::cout << " done [" << std::setw(5) << time << "ms]\n";
419  return time;
420 }
421 
422 template <typename C>
423 struct reserver {
424  void operator()(C&, unsigned) const {
425  // default: no action
426  }
427 };
428 
429 template <typename A, typename B, typename C, typename D>
430 struct reserver<std::unordered_set<A, B, C, D>> {
431  void operator()(std::unordered_set<A, B, C, D>& set, unsigned size) const {
432  set.reserve(size);
433  }
434 };
435 
436 #define checkPerformance(set_type, name, in, out) \
437  { \
438  std::cout << "Testing: " << name << " ..\n"; \
439  set_type set; \
440  time("filling set", [&]() { \
441  reserver<set_type>()(set, in.size()); \
442  for (const auto& cur : in) { \
443  set.insert(cur); \
444  } \
445  }); \
446  int counter = 0; \
447  time("full scan", [&]() { \
448  for (auto it = set.begin(); it != set.end(); ++it) { \
449  counter++; \
450  } \
451  }); \
452  EXPECT_EQ((size_t)counter, set.size()); \
453  bool allPresent = true; \
454  time("membership in", [&]() { \
455  for (const auto& cur : in) { \
456  allPresent = (set.find(cur) != set.end()) && allPresent; \
457  } \
458  }); \
459  EXPECT_TRUE(allPresent); \
460  bool allMissing = true; \
461  time("membership out", [&]() { \
462  for (const auto& cur : out) { \
463  allMissing = (set.find(cur) == set.end()) && allMissing; \
464  } \
465  }); \
466  EXPECT_TRUE(allMissing); \
467  bool allFound = true; \
468  time("lower_boundaries", [&]() { \
469  for (const auto& cur : in) { \
470  allFound = (set.lower_bound(cur) == set.find(cur)) && allFound; \
471  } \
472  }); \
473  EXPECT_TRUE(allFound); \
474  allFound = true; \
475  time("upper_boundaries", [&]() { \
476  for (const auto& cur : in) { \
477  allFound = (set.upper_bound(cur) == (++set.find(cur))) && allFound; \
478  } \
479  }); \
480  EXPECT_TRUE(allFound); \
481  allFound = true; \
482  time("boundaries on missing elements", [&]() { \
483  for (const auto& cur : out) { \
484  allFound = (set.lower_bound(cur) == set.upper_bound(cur)) && allFound; \
485  } \
486  }); \
487  EXPECT_TRUE(allFound); \
488  set_type a(in.begin(), in.end()); \
489  set_type b(out.begin(), out.end()); \
490  time("merge two sets", [&]() { a.insert(b.begin(), b.end()); }); \
491  std::cout << "\tDone!\n\n"; \
492  }
493 
494 TEST(Performance, Basic) {
495  int N = 1 << 18;
496 
497  // get list of tuples to be inserted
498  std::cout << "Generating Test-Data ...\n";
499  std::vector<Entry> in;
500  std::vector<Entry> out;
501  time("generating data", [&]() {
502  auto data = getData(2 * N);
503  for (std::size_t i = 0; i < data.size(); i += 2) {
504  in.push_back(data[i]);
505  out.push_back(data[i + 1]);
506  }
507  });
508 
509  using t1 = std::set<Entry>;
510  checkPerformance(t1, " -- warm up -- ", in, out);
511  using t2 = btree_multiset<Entry, detail::comparator<Entry>, std::allocator<Entry>, 256,
513  checkPerformance(t2, "souffle btree_multiset - 256 - linear", in, out);
514  using t3 = btree_multiset<Entry, detail::comparator<Entry>, std::allocator<Entry>, 256,
516  checkPerformance(t3, "souffle btree_multiset - 256 - binary", in, out);
517 }
518 
519 TEST(Performance, Load) {
520  int N = 1 << 20;
521 
522  std::vector<int> data;
523  for (int i = 0; i < N; i++) {
524  data.push_back(i);
525  }
526 
527  // take time for conventional load
528  time("conventional load", [&]() { btree_multiset<int> t(data.begin(), data.end()); });
529 
530  // take time for structured load
531  time("bulk-load", [&]() { auto t = btree_multiset<int>::load(data.begin(), data.end()); });
532 }
533 } // namespace test
534 } // end namespace souffle
BTree.h
TCB_SPAN_NAMESPACE_NAME::detail::size
constexpr auto size(const C &c) -> decltype(c.size())
Definition: span.h:198
DEBUG
#define DEBUG(Kind)
EXPECT_TRUE
#define EXPECT_TRUE(a)
Definition: test.h:189
souffle::btree_multiset
A b-tree based multi-set implementation.
Definition: BTree.h:2303
souffle::test::now
time_point now()
Definition: btree_multiset_test.cpp:402
EXPECT_EQ
#define EXPECT_EQ(a, b)
Definition: test.h:191
e
l j a showGridBackground &&c b raw series this eventEmitter e
Definition: htmlJsChartistMin.h:15
souffle::test::time_point
std::chrono::high_resolution_clock::time_point time_point
Definition: btree_multiset_test.cpp:400
j
var j
Definition: htmlJsChartistMin.h:15
souffle::detail::btree::insert
bool insert(const Key &k)
Inserts the given key into this tree.
Definition: BTree.h:1328
souffle::btree_multiset::load
static btree_multiset load(const Iter &a, const Iter &b)
Definition: BTree.h:2346
souffle::test::reserver< std::unordered_set< A, B, C, D > >::operator()
void operator()(std::unordered_set< A, B, C, D > &set, unsigned size) const
Definition: btree_multiset_test.cpp:431
i
size_t i
Definition: json11.h:663
souffle::detail::linear_search
A linear search strategy for looking up keys in b-tree nodes.
Definition: BTree.h:87
EXPECT_LT
#define EXPECT_LT(a, b)
Definition: test.h:199
test.h
souffle::test::reserver::operator()
void operator()(C &, unsigned) const
Definition: btree_multiset_test.cpp:424
k
var k
Definition: htmlJsChartistMin.h:15
EXPECT_NE
#define EXPECT_NE(a, b)
Definition: test.h:195
souffle::test::duration
long duration(const time_point &start, const time_point &end)
Definition: btree_multiset_test.cpp:406
souffle::detail::binary_search
A binary search strategy for looking up keys in b-tree nodes.
Definition: BTree.h:141
souffle::test::getData
std::vector< Entry > getData(unsigned numEntries)
Definition: btree_multiset_test.cpp:387
souffle::test::time
long time(const std::string &name, const Op &operation)
Definition: btree_multiset_test.cpp:411
b
l j a showGridBackground &&c b raw series this eventEmitter b
Definition: htmlJsChartistMin.h:15
std
Definition: Brie.h:3053
souffle::test::reserver
Definition: btree_multiset_test.cpp:423
std::operator<<
ostream & operator<<(ostream &out, const array< T, E > &v)
Enables the generic printing of arrays assuming their element types are printable.
Definition: StreamUtil.h:233
souffle::test::Entry
std::tuple< int, int > Entry
Definition: btree_multiset_test.cpp:385
souffle
Definition: AggregateOp.h:25
TCB_SPAN_NAMESPACE_NAME::detail::data
constexpr auto data(C &c) -> decltype(c.data())
Definition: span.h:210
checkPerformance
#define checkPerformance(set_type, name, in, out)
Definition: btree_multiset_test.cpp:436
EXPECT_FALSE
#define EXPECT_FALSE(a)
Definition: test.h:190
souffle::test::TEST
TEST(EqRelTest, Scoping)
Definition: binary_relation_test.cpp:51