Ocean
HashSet.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_BASE_HASH_SET_H
9 #define META_OCEAN_BASE_HASH_SET_H
10 
11 #include "ocean/base/Base.h"
12 
13 #include <vector>
14 
15 namespace Ocean
16 {
17 
18 /**
19  * This class implements a hash set.
20  * @tparam T The data type that is stored by the hash set
21  * @ingroup base
22  */
23 template <typename T>
24 class HashSet
25 {
26  protected:
27 
28  /**
29  * Definition of a pair combining a counter states and an object.
30  */
31  typedef typename std::pair<std::pair<size_t, size_t>, T> Element;
32 
33  /**
34  * Definition of a vector holding the set objects.
35  */
36  typedef std::vector<Element> Elements;
37 
38  /**
39  * Definition of a function pointer returning a hash set value.
40  */
41  typedef size_t (*ValueFunction)(const T& element);
42 
43  public:
44 
45  /**
46  * Copy constructor.
47  * @param hashSet The hash set to copy
48  */
49  inline HashSet(const HashSet<T>& hashSet);
50 
51  /**
52  * Move constructor.
53  * @param hashSet The hash set to move
54  */
55  inline HashSet(HashSet<T>&& hashSet) noexcept;
56 
57  /**
58  * Creates a new hash set object by a given capacity.
59  * @param capacity Maximal capacity the hash set will support
60  * @param function Hash function to be used
61  */
62  explicit HashSet(const size_t capacity, const ValueFunction& function = defaultHashFunction);
63 
64  /**
65  * Adds a new element to this hash set.
66  * @param element Element to be added
67  * @param oneOnly Adds the element if it does not exist already
68  * @param extendCapacity True, to extend the capacity if necessary
69  * @return True, if the element has been added
70  */
71  bool insert(const T& element, const bool oneOnly = true, const bool extendCapacity = true);
72 
73  /**
74  * Adds (moves) a new element to this hash set.
75  * @param element Element to be moved
76  * @param oneOnly Adds the element if it does not exist already
77  * @param extendCapacity True, to extend the capacity if necessary
78  * @return True, if the element has been added
79  */
80  bool insert(T&& element, const bool oneOnly = true, const bool extendCapacity = true);
81 
82  /**
83  * Removes an element from this hash set.
84  * @param element Element to be removed
85  * @return True, if succeeded
86  */
87  bool remove(const T& element);
88 
89  /**
90  * Returns whether this hash set holds a given element.
91  * @param element Element to be checked
92  * @return True, if succeeded
93  */
94  bool find(const T& element) const;
95 
96  /**
97  * Removes all elements from this has set.
98  */
99  void clear();
100 
101  /**
102  * Returns the number of elements this hash set currently holds.
103  * @return Number of elements
104  */
105  inline size_t size() const;
106 
107  /**
108  * Returns the capacity of this hash set.
109  * @return Maximal capacity this hash set supports
110  */
111  inline size_t capacity() const;
112 
113  /**
114  * Returns whether this hash set is empty.
115  * @return True, if so
116  */
117  inline bool isEmpty() const;
118 
119  /**
120  * Assign operator.
121  * @param hashSet The hash set to assign
122  * @return The reference to this object
123  */
124  inline HashSet<T>& operator=(const HashSet<T>& hashSet);
125 
126  /**
127  * Move operator.
128  * @param hashSet The hash set to move
129  * @return The reference to this object
130  */
131  inline HashSet<T>& operator=(HashSet<T>&& hashSet) noexcept;
132 
133  protected:
134 
135  /**
136  * Creates a new hash set by a given hash set.
137  * @param capacity The capacity of the new has set, with range [hashSet.size(), infinity)
138  * @param hashSet The hash set which defines the initial values of this hash set, will be moved
139  */
140  HashSet(size_t capacity, HashSet<T>&& hashSet);
141 
142  /**
143  * Default hash function for elements supporting an cast size_t cast.
144  * @param element Element to return the hash value for
145  * @return Resulting hash value
146  */
147  static inline size_t defaultHashFunction(const T& element);
148 
149  /**
150  * Returns whether this hash set is still consistent.
151  * @return True, if so
152  */
153  bool isConsistent() const;
154 
155  protected:
156 
157  /// Hash set elements.
159 
160  /// Number of elements this has set holds.
161  size_t setSize;
162 
163  /// Value function.
165 };
166 
167 template <typename T>
168 inline HashSet<T>::HashSet(const HashSet<T>& hashSet) :
169  setElements(hashSet.setElements),
170  setSize(hashSet.setSize),
171  setFunction(hashSet.setFunction)
172 {
173  // nothing to do here
174 }
175 
176 template <typename T>
177 inline HashSet<T>::HashSet(HashSet<T>&& hashSet) noexcept :
178  setElements(std::move(hashSet.setElements)),
179  setSize(hashSet.setSize),
180  setFunction(hashSet.setFunction)
181 {
182  hashSet.setSize = 0;
183 }
184 
185 template <typename T>
186 HashSet<T>::HashSet(const size_t capacity, const ValueFunction& function) :
187  setElements(capacity),
188  setSize(0),
189  setFunction(function)
190 {
191  ocean_assert(isConsistent());
192 }
193 
194 template <typename T>
195 HashSet<T>::HashSet(size_t capacity, HashSet<T>&& hashSet) :
196  setElements(capacity),
197  setSize(0),
198  setFunction(hashSet.setFunction)
199 {
200  ocean_assert(capacity >= hashSet.size());
201 
202  for (typename Elements::iterator i = hashSet.setElements.begin(); i != hashSet.setElements.end(); ++i)
203  if (i->first.first != 0)
204  {
205  T& element = i->second;
206  insert(std::move(element));
207  }
208 
209  ocean_assert(size() == hashSet.size());
210  ocean_assert(isConsistent());
211 
212  hashSet.clear();
213 }
214 
215 template <typename T>
216 bool HashSet<T>::insert(const T& element, const bool oneOnly, const bool extendCapacity)
217 {
218  ocean_assert(setSize <= setElements.size());
219  ocean_assert(isConsistent());
220 
221  // check whether we have to extend the capacity of this hash set (we extend the set if more than 80% is occupied)
222  if (extendCapacity && setSize >= setElements.size() * 80 / 100)
223  {
224  *this = HashSet<T>(max(size_t(32), setElements.size() * 2), std::move(*this));
225  ocean_assert(setSize < setElements.size() * 80 / 100);
226  }
227 
228  if (setSize == setElements.size())
229  return false;
230 
231  if (oneOnly)
232  {
233  // linear search
234  for (size_t n = 0; n < setElements.size(); ++n)
235  {
236  const size_t value = (setFunction(element) + n) % setElements.size();
237 
238  // check whether the place is free
239  if (setElements[value].first.first == 0)
240  {
241  setElements[value].first.first = 1;
242  setElements[value].first.second = n;
243  setElements[value].second = element;
244 
245  ++setSize;
246  return true;
247  }
248  else if (setElements[value].second == element)
249  return false;
250  else
251  setElements[value].first.first++;
252  }
253  }
254  else
255  {
256  // linear search
257  for (size_t n = 0; n < setElements.size(); ++n)
258  {
259  const size_t value = (setFunction(element) + n) % setElements.size();
260 
261  // check whether the place is free
262  if (setElements[value].first.first == 0)
263  {
264  setElements[value].first.first = 1;
265  setElements[value].first.second = n;
266  setElements[value].second = element;
267 
268  ++setSize;
269  return true;
270  }
271  else
272  setElements[value].first.first++;
273  }
274  }
275 
276  ocean_assert(false && "This must never happen!");
277  return false;
278 }
279 
280 template <typename T>
281 bool HashSet<T>::insert(T&& element, const bool oneOnly, const bool extendCapacity)
282 {
283  ocean_assert(setSize <= setElements.size());
284  ocean_assert(isConsistent());
285 
286  // check whether we have to extend the capacity of this hash set (we extend the set if more than 80% is occupied)
287  if (extendCapacity && setSize >= setElements.size() * 80 / 100)
288  {
289  *this = HashSet<T>(max(size_t(32), setElements.size() * 2), std::move(*this));
290  ocean_assert(setSize < setElements.size() * 80 / 100);
291  }
292 
293  if (setSize == setElements.size())
294  return false;
295 
296  if (oneOnly)
297  {
298  // linear search
299  for (size_t n = 0; n < setElements.size(); ++n)
300  {
301  const size_t value = (setFunction(element) + n) % setElements.size();
302 
303  // check whether the place is free
304  if (setElements[value].first.first == 0)
305  {
306  setElements[value].first.first = 1;
307  setElements[value].first.second = n;
308  setElements[value].second = std::move(element);
309 
310  ++setSize;
311  return true;
312  }
313  else if (setElements[value].second == element)
314  return false;
315  else
316  setElements[value].first.first++;
317  }
318  }
319  else
320  {
321  // linear search
322  for (size_t n = 0; n < setElements.size(); ++n)
323  {
324  const size_t value = (setFunction(element) + n) % setElements.size();
325 
326  // check whether the place is free
327  if (setElements[value].first.first == 0)
328  {
329  setElements[value].first.first = 1;
330  setElements[value].first.second = n;
331  setElements[value].second = std::move(element);
332 
333  ++setSize;
334  return true;
335  }
336  else
337  setElements[value].first.first++;
338  }
339  }
340 
341  ocean_assert(false && "This must never happen!");
342  return false;
343 }
344 
345 template <typename T>
346 bool HashSet<T>::remove(const T& element)
347 {
348  ocean_assert(setSize <= setElements.size());
349  ocean_assert(isConsistent());
350 
351  // linear search
352  for (size_t n = 0; n < setElements.size(); ++n)
353  {
354  const size_t value = (setFunction(element) + n) % setElements.size();
355 
356  // check whether this place is free
357  if (setElements[value].first.first == 0)
358  return false;
359 
360  // check whether this place has no shift problem
361  if (setElements[value].first.first == 1)
362  {
363  if (setElements[value].second == element)
364  {
365  setElements[value].first.first = 0;
366  setElements[value].second = T();
367  --setSize;
368 
369  ocean_assert(isConsistent());
370 
371  return true;
372  }
373 
374  // the element is not the element to be removed, but also there is no other position for this element
375  return false;
376  }
377 
378  ocean_assert(setElements[value].first.first > 1);
379 
380  size_t elementOffset = 0u;
381 
382  if (setElements[value].second == element)
383  {
384  // the element exists however, the following elements needs a special handling
385 
386  size_t localValue = value;
387  size_t endLocation = setElements.size();
388 
389  while (true)
390  {
391  size_t lastOffset = 0;
392 
393  // find last element to swap
394  for (size_t i = 1; i < endLocation; ++i)
395  {
396  const size_t testValue = (localValue + i) % setElements.size();
397 
398  if (setElements[testValue].first.first >= 1)
399  {
400  if (setElements[testValue].first.second >= i)
401  lastOffset = i;
402  }
403 
404  if (setElements[testValue].first.first <= 1)
405  break;
406  }
407 
408  if (lastOffset == 0)
409  break;
410 
411  ocean_assert(endLocation >= lastOffset);
412  endLocation -= lastOffset;
413 
414  elementOffset += lastOffset;
415 
416  const size_t lastValue = (localValue + lastOffset) % setElements.size();
417 
418  // move the found element
419 
420  // setElements[localValue].first.first stays constant
421  setElements[localValue].first.second = setElements[lastValue].first.second - lastOffset;
422  setElements[localValue].second = setElements[lastValue].second;
423 
424  localValue = lastValue;
425 
426  if (setElements[lastValue].first.first == 1)
427  break;
428  }
429 
430  // decrease the used counter
431  const size_t startIndex = setFunction(element);
432 
433  for (size_t i = 0u; i < elementOffset + n; ++i)
434  setElements[(startIndex + i) % setElements.size()].first.first--;
435 
436  setElements[(value + elementOffset) % setElements.size()].first.first = 0;
437  setElements[(value + elementOffset) % setElements.size()].second = T();
438  --setSize;
439 
440  ocean_assert(isConsistent());
441 
442  return true;
443  }
444  }
445 
446  ocean_assert(false && "This must never happen!");
447  return false;
448 }
449 
450 template <typename T>
451 bool HashSet<T>::find(const T& element) const
452 {
453  ocean_assert(setSize <= setElements.size());
454  ocean_assert(isConsistent());
455 
456  // linear search
457  for (size_t n = 0; n < setElements.size(); ++n)
458  {
459  const size_t value = (setFunction(element) + n) % setElements.size();
460 
461  // check whether this place is free
462  if (setElements[value].first.first == 0)
463  return false;
464 
465  // check whether this element is equal to the given one
466  if (setElements[value].second == element)
467  return true;
468 
469  // check whether this place is not free but unique
470  if (setElements[value].first.first == 1)
471  return false;
472  }
473 
474  return false;
475 }
476 
477 template <typename T>
479 {
480  ocean_assert(isConsistent());
481 
482  for (typename Elements::iterator i = setElements.begin(); i != setElements.end(); ++i)
483  i->first.first = 0;
484 
485  setSize = 0;
486 
487  ocean_assert(isConsistent());
488 }
489 
490 template <typename T>
491 inline size_t HashSet<T>::size() const
492 {
493  return setSize;
494 }
495 
496 template <typename T>
497 inline size_t HashSet<T>::capacity() const
498 {
499  return setElements.size();
500 }
501 
502 template <typename T>
503 inline bool HashSet<T>::isEmpty() const
504 {
505  return setSize == 0;
506 }
507 
508 template <typename T>
510 {
511  if (this != &hashSet)
512  {
513  setElements = hashSet.setElements;
514  setSize = hashSet.setSize;
515  setFunction = hashSet.setFunction;
516  }
517 
518  return *this;
519 }
520 
521 template <typename T>
522 inline HashSet<T>& HashSet<T>::operator=(HashSet<T>&& hashSet) noexcept
523 {
524  if (this != &hashSet)
525  {
526  setElements = std::move(hashSet.setElements);
527  setSize = hashSet.setSize;
528  setFunction = hashSet.setFunction;
529 
530  hashSet.setSize = 0;
531  }
532 
533  return *this;
534 }
535 
536 template <typename T>
537 inline size_t HashSet<T>::defaultHashFunction(const T& element)
538 {
539  return size_t(element);
540 }
541 
542 template <typename T>
544 {
545  size_t count = 0;
546 
547  for (typename Elements::const_iterator i = setElements.begin(); i != setElements.end(); ++i)
548  if (i->first.first != 0)
549  ++count;
550 
551  return count == setSize;
552 }
553 
554 }
555 
556 #endif // META_OCEAN_BASE_HASH_SET_H
This class implements a hash set.
Definition: HashSet.h:25
ValueFunction setFunction
Value function.
Definition: HashSet.h:164
bool remove(const T &element)
Removes an element from this hash set.
Definition: HashSet.h:346
size_t capacity() const
Returns the capacity of this hash set.
Definition: HashSet.h:497
static size_t defaultHashFunction(const T &element)
Default hash function for elements supporting an cast size_t cast.
Definition: HashSet.h:537
void clear()
Removes all elements from this has set.
Definition: HashSet.h:478
HashSet(const HashSet< T > &hashSet)
Copy constructor.
Definition: HashSet.h:168
HashSet< T > & operator=(const HashSet< T > &hashSet)
Assign operator.
Definition: HashSet.h:509
size_t size() const
Returns the number of elements this hash set currently holds.
Definition: HashSet.h:491
size_t(* ValueFunction)(const T &element)
Definition of a function pointer returning a hash set value.
Definition: HashSet.h:41
size_t setSize
Number of elements this has set holds.
Definition: HashSet.h:161
std::vector< Element > Elements
Definition of a vector holding the set objects.
Definition: HashSet.h:36
bool isEmpty() const
Returns whether this hash set is empty.
Definition: HashSet.h:503
std::pair< std::pair< size_t, size_t >, T > Element
Definition of a pair combining a counter states and an object.
Definition: HashSet.h:31
bool insert(const T &element, const bool oneOnly=true, const bool extendCapacity=true)
Adds a new element to this hash set.
Definition: HashSet.h:216
Elements setElements
Hash set elements.
Definition: HashSet.h:158
bool find(const T &element) const
Returns whether this hash set holds a given element.
Definition: HashSet.h:451
bool isConsistent() const
Returns whether this hash set is still consistent.
Definition: HashSet.h:543
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15