8 #ifndef META_OCEAN_MATH_CLUSTERING_K_MEANS_H
9 #define META_OCEAN_MATH_CLUSTERING_K_MEANS_H
33 template <
bool tUseIndices>
43 template <
typename T,
size_t tDimension>
57 template <
typename T,
size_t tDimension>
92 Data(
const Observation* observations,
const size_t numberObservations,
const bool copyObservations =
false);
140 explicit inline operator bool()
const;
151 size_t numberObservations_ = 0;
161 template <
typename T,
size_t tDimension>
197 Data(
const Observation** observationPointers,
const size_t numberObservations,
const bool copyPointers =
false);
245 explicit inline operator bool()
const;
256 size_t numberObservations_;
268 template <
typename T,
size_t tDimension,
typename TSum = T,
typename TSquareDistance = T,
bool tUseIndices = true>
310 friend class ClusteringKMeans<T, tDimension, TSum, TSquareDistance, tUseIndices>;
311 friend class std::allocator<
Cluster>;
540 explicit inline operator bool()
const;
600 template <
typename T,
size_t tDimension>
602 copyObservations_(copyObservations ? numberObservations : 0),
603 observations_(nullptr),
604 numberObservations_(0)
608 if (copyObservations)
624 template <
typename T,
size_t tDimension>
626 copyObservations_(std::move(data.copyObservations_)),
627 observations_(data.observations_),
628 numberObservations_(data.numberObservations_)
630 data.observations_ =
nullptr;
631 data.numberObservations_ = 0;
635 template <
typename T,
size_t tDimension>
638 ocean_assert(isValidDataIndex(dataIndex));
639 return observations_[dataIndex];
643 template <
typename T,
size_t tDimension>
646 return numberObservations_;
650 template <
typename T,
size_t tDimension>
653 return dataIndex < numberObservations_;
657 template <
typename T,
size_t tDimension>
660 ocean_assert(isValidDataIndex(dataIndex));
661 return observations_[dataIndex];
665 template <
typename T,
size_t tDimension>
670 copyObservations_ = std::move(data.copyObservations_);
671 observations_ = data.observations_;
672 numberObservations_ = data.numberObservations_;
674 data.observations_ =
nullptr;
675 data.numberObservations_ = 0;
682 template <
typename T,
size_t tDimension>
685 return observations_ !=
nullptr;
689 template <
typename T,
size_t tDimension>
691 copyObservationPointers_(copyPointers ? numberObservations : 0),
692 observationPointers_(nullptr),
693 numberObservations_(0)
713 template <
typename T,
size_t tDimension>
715 copyObservationPointers_(std::move(data.copyObservationPointers_)),
716 observationPointers_(data.observationPointers_),
717 numberObservations_(data.numberObservations_)
719 data.observationPointers_ =
nullptr;
720 data.numberObservations_ = 0;
724 template <
typename T,
size_t tDimension>
727 ocean_assert(isValidDataIndex(dataIndex));
728 return *(observationPointers_[dataIndex]);
732 template <
typename T,
size_t tDimension>
735 return numberObservations_;
739 template <
typename T,
size_t tDimension>
742 return dataIndex < numberObservations_;
746 template <
typename T,
size_t tDimension>
749 ocean_assert(isValidDataIndex(dataIndex));
750 return *(observationPointers_[dataIndex]);
754 template <
typename T,
size_t tDimension>
759 copyObservationPointers_ = std::move(data.copyObservationPointers_);
760 observationPointers_ = data.observationPointers_;
761 numberObservations_ = data.numberObservations_;
763 data.observationPointers_ =
nullptr;
764 data.numberObservations_ = 0;
771 template <
typename T,
size_t tDimension>
774 return observationPointers_ !=
nullptr;
777 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
779 owner_(cluster.owner_),
780 mean_(std::move(cluster.mean_)),
781 dataIndices_(std::move(cluster.dataIndices_))
783 cluster.owner_ =
nullptr;
786 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
794 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
795 inline ClusteringKMeans<T, tDimension, TSum, TSquareDistance, tUseIndices>::Cluster::Cluster(
const ClusteringKMeans<T, tDimension, TSum, TSquareDistance, tUseIndices>& owner,
const Observation& mean,
const DataIndices& dataIndices) :
798 dataIndices_(dataIndices)
800 static_assert(tDimension != 0,
"Invalid observation dimension!");
803 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
804 inline ClusteringKMeans<T, tDimension, TSum, TSquareDistance, tUseIndices>::Cluster::Cluster(
const ClusteringKMeans<T, tDimension, TSum, TSquareDistance, tUseIndices>& owner,
const Observation& mean,
DataIndices&& dataIndices) :
807 dataIndices_(dataIndices)
809 static_assert(tDimension != 0,
"Invalid observation dimension!");
812 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
815 TSquareDistance result = TSquareDistance(0);
817 for (
size_t n = 0; n < tDimension; ++n)
819 result += (mean_[n] - observation[n]) * (mean_[n] - observation[n]);
825 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
828 ocean_assert(owner_);
830 if (dataIndices_.empty())
832 return TSquareDistance(0);
835 const Data& data = owner_->data_;
836 TSquareDistance result = TSquareDistance(0);
838 for (
typename DataIndices::const_iterator i = dataIndices_.begin(); i != dataIndices_.end(); ++i)
840 const TSquareDistance localDistance =
sqrDistance(data[*i]);
842 if (localDistance > result)
844 result = localDistance;
846 if (observationIndex)
848 *observationIndex = *i;
856 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
859 ocean_assert(owner_);
861 if (dataIndices_.empty())
863 return TSquareDistance(0);
866 const Data& data = owner_->data_;
867 TSquareDistance result = TSquareDistance(0);
869 for (
typename DataIndices::const_iterator i = dataIndices_.begin(); i != dataIndices_.end(); ++i)
874 ocean_assert(dataIndices_.size() != 0);
875 return result / TSquareDistance(dataIndices_.size());
878 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
884 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
890 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
896 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
899 if (
this != &cluster)
901 mean_ = std::move(cluster.mean_);
902 dataIndices_ = std::move(cluster.dataIndices_);
903 owner_ = cluster.owner_;
905 cluster.owner_ =
nullptr;
911 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
914 return dataIndices_.size() < cluster.
dataIndices_.size();
917 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
920 if (dataIndices_.empty())
922 for (
size_t d = 0; d < tDimension; ++d)
930 ocean_assert(owner_);
931 const Data& data = owner_->data_;
933 for (
typename DataIndices::const_iterator i = dataIndices_.begin(); i != dataIndices_.end(); ++i)
935 ocean_assert(*i < data.numberObservations());
937 for (
size_t d = 0; d < tDimension; ++d)
939 sumObservation[d] += data[*i][d];
943 const TSum count = TSum(dataIndices_.size());
945 for (
size_t d = 0; d < tDimension; ++d)
947 mean_[d] = T(sumObservation[d] / count);
951 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
954 static_assert(tDimension != 0,
"Invalid observation dimension!");
957 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
959 data_(std::move(clustering.data_)),
960 clusters_(std::move(clustering.clusters_))
962 static_assert(tDimension != 0,
"Invalid observation dimension!");
965 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
969 static_assert(tDimension != 0,
"Invalid observation dimension!");
971 ocean_assert(
data_ &&
"The data element is invalid!");
974 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
976 data_(std::move(data))
978 static_assert(tDimension != 0,
"Invalid observation dimension!");
980 ocean_assert(
data_ &&
"The data element is invalid!");
983 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
986 ocean_assert(clusters_.empty());
988 if (strategy == IS_LARGEST_DISTANCE)
990 determineInitialClustersLargestDistance(numberClusters);
994 ocean_assert(strategy == IS_RANDOM);
995 determineInitialClustersRandom(numberClusters);
998 ocean_assert(iterations >= 1);
999 applyOptimizationIteration(worker);
1001 for (
size_t i = 0; i < iterations; i++)
1003 applyOptimizationIteration(worker);
1007 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1010 ocean_assert(data_);
1011 ocean_assert(clusters_.empty());
1014 const size_t firstDataIndex = smallestObservation(data_);
1015 ocean_assert(firstDataIndex !=
size_t(-1));
1017 clusters_.push_back(
Cluster(*
this, data_[firstDataIndex], std::move(createIndices<size_t>(data_.numberObservations(), 0))));
1019 while (maximalClusters == 0 || clusters_.size() < maximalClusters)
1021 if (!addCluster(iterations, maximalSqrDistance, worker))
1028 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1031 size_t maximalIndex = size_t(-1);
1032 TSquareDistance maximalDistance = TSquareDistance(0);
1034 for (
size_t c = 0; c < clusters_.size(); ++c)
1036 size_t localIndex = size_t(-1);
1037 const TSquareDistance localDistance = clusters_[c].maximalSqrDistance(&localIndex);
1039 if (localDistance > maximalDistance)
1041 maximalDistance = localDistance;
1042 maximalIndex = localIndex;
1046 if (maximalIndex !=
size_t(-1) && maximalDistance >=
sqrDistance)
1048 clusters_.push_back(
Cluster(*
this, data_[maximalIndex]));
1050 ocean_assert(iterations >= 1);
1051 applyOptimizationIteration(worker);
1053 for (
size_t n = 1; n < iterations; ++n)
1055 applyOptimizationIteration(worker);
1064 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1067 ocean_assert(!clusters_.empty());
1069 if (clusters_.size() <= 1)
1075 size_t minimalCluster = 0;
1078 for (
size_t c = 0; c < clusters_.size(); ++c)
1080 const TSquareDistance localDistance = clusters_[c].maximalSqrDistance();
1082 if (localDistance < minimalDistance)
1084 minimalDistance = localDistance;
1089 Clusters tmpClusters(std::move(clusters_));
1090 ocean_assert(clusters_.empty());
1092 ocean_assert(tmpClusters.size() >= 1);
1093 clusters_.reserve(tmpClusters.size() - 1);
1095 for (
size_t c = 0; c < tmpClusters.size(); ++c)
1097 if (c != minimalCluster)
1099 clusters_.push_back(std::move(tmpClusters[c]));
1103 ocean_assert(!clusters_.empty());
1105 ocean_assert(iterations >= 1);
1106 applyOptimizationIteration(worker);
1108 for (
size_t n = 1; n < iterations; ++n)
1110 applyOptimizationIteration(worker);
1115 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1119 size_t minimalIndex = size_t(-1);
1121 for (
size_t n = 0; n < clusters_.size(); ++n)
1123 const TSquareDistance localDistance = clusters_[n].sqrDistance(observation);
1125 if (localDistance < minimalDistance)
1127 minimalDistance = localDistance;
1132 return minimalIndex;
1135 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1141 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1144 std::sort(clusters_.rbegin(), clusters_.rend());
1147 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1150 TSquareDistance maximalDistance = TSquareDistance(0);
1152 for (
typename Clusters::const_iterator i = clusters_.begin(); i != clusters_.end(); ++i)
1154 maximalDistance = max(maximalDistance, i->maximalSqrDistance());
1157 return maximalDistance;
1160 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1163 ocean_assert(data_.numberObservations() != 0);
1164 ocean_assert(clusters_.empty());
1167 const DataIndex firstDataIndex = smallestObservation(data_);
1168 ocean_assert(firstDataIndex !=
DataIndex(-1));
1170 clusters_.push_back(
Cluster(*
this, data_[firstDataIndex]));
1172 while (clusters_.size() < numberClusters)
1174 TSquareDistance largestDistance = TSquareDistance(0);
1175 size_t largestIndex = size_t(-1);
1177 for (
size_t o = 0; o < data_.numberObservations(); ++o)
1183 for (
size_t c = 0; c < clusters_.size(); ++c)
1185 localDistance = min(localDistance, clusters_[c].
sqrDistance(observation));
1188 if (localDistance > largestDistance)
1190 largestDistance = localDistance;
1196 if (largestIndex ==
size_t(-1))
1201 ocean_assert(largestDistance != TSquareDistance(0));
1204 for (
size_t c = 0; c < clusters_.size(); ++c)
1206 ocean_assert(clusters_[c].mean() != data_[largestIndex]);
1210 clusters_.push_back(
Cluster(*
this, data_[largestIndex]));
1213 ocean_assert(!clusters_.empty());
1215 for (
size_t c = 0; c < clusters_.size(); ++c)
1217 ocean_assert(clusters_[c].dataIndices().empty());
1218 clusters_[c].dataIndices().reserve(data_.numberObservations() * 2 / clusters_.size());
1222 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1225 ocean_assert(data_.numberObservations() != 0);
1226 ocean_assert(clusters_.empty());
1231 const size_t firstDataIndex = smallestObservation(data_);
1232 ocean_assert(firstDataIndex !=
size_t(-1));
1234 clusters_.push_back(
Cluster(*
this, data_[firstDataIndex]));
1236 size_t iterations = 0;
1237 while (clusters_.size() < numberClusters && iterations++ < numberClusters * 100)
1239 TSquareDistance largestDistance = 0u;
1240 size_t largestIndex = size_t(-1);
1242 for (
size_t n = 0; n < max<size_t>(1, data_.numberObservations() / 128); ++n)
1244 const size_t index = random64 ? RandomI::random64() % data_.numberObservations() : RandomI::random32() % (
unsigned int)data_.numberObservations();
1249 for (
size_t c = 0; c < clusters_.size(); ++c)
1251 smallestDistance = min(smallestDistance, clusters_[c].
sqrDistance(candidate));
1254 if (smallestDistance > largestDistance)
1256 largestDistance = smallestDistance;
1257 largestIndex = index;
1262 if (largestIndex ==
size_t(-1))
1267 ocean_assert(largestDistance != TSquareDistance(0));
1270 for (
size_t c = 0; c < clusters_.size(); ++c)
1272 ocean_assert(clusters_[c].mean() != data_[largestIndex]);
1276 clusters_.push_back(
Cluster(*
this, data_[largestIndex]));
1279 ocean_assert(!clusters_.empty());
1281 for (
size_t c = 0; c < clusters_.size(); ++c)
1283 ocean_assert(clusters_[c].dataIndices().empty());
1284 clusters_[c].dataIndices().reserve(data_.numberObservations() * 2 / clusters_.size());
1288 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1291 ocean_assert(!clusters_.empty());
1294 for (
size_t c = 0; c < clusters_.size(); ++c)
1296 clusters_[c].dataIndices().clear();
1300 for (
size_t o = 0; o < data_.numberObservations(); ++o)
1305 size_t bestCluster = size_t(-1);
1307 for (
size_t c = 0; c < clusters_.size(); ++c)
1309 const TSquareDistance localDistance = clusters_[c].sqrDistance(observation);
1311 if (localDistance < bestDistance)
1313 bestDistance = localDistance;
1318 ocean_assert(bestCluster !=
size_t(-1));
1320 clusters_[bestCluster].dataIndices().push_back(o);
1324 for (
size_t c = 0; c < clusters_.size(); ++c)
1326 clusters_[c].updateMean();
1330 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1333 if (worker ==
nullptr)
1335 applyOptimizationIteration();
1339 ocean_assert(!clusters_.empty());
1342 for (
size_t c = 0; c < clusters_.size(); ++c)
1344 clusters_[c].dataIndices().clear();
1348 worker->
executeFunction(Worker::Function::create(*
this, &
ClusteringKMeans<T, tDimension, TSum, TSquareDistance, tUseIndices>::applyOptimizationIterationSubset, &lock, 0u, 0u), 0u, (
unsigned int)data_.numberObservations(), 1u, 2u);
1351 for (
size_t c = 0; c < clusters_.size(); ++c)
1353 clusters_[c].updateMean();
1358 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1361 ocean_assert(!clusters_.empty());
1363 std::vector<DataIndices> localClusters(clusters_.size());
1366 for (
size_t o = firstObservation; o < firstObservation + numberObservations; ++o)
1371 size_t bestCluster = size_t(-1);
1373 for (
size_t c = 0; c < clusters_.size(); ++c)
1375 const TSquareDistance localDistance = clusters_[c].sqrDistance(observation);
1377 if (localDistance < bestDistance)
1379 bestDistance = localDistance;
1384 ocean_assert(bestCluster !=
size_t(-1));
1386 localClusters[bestCluster].push_back(o);
1391 for (
size_t c = 0; c < localClusters.size(); ++c)
1393 clusters_[c].dataIndices().insert(clusters_[c].dataIndices().end(), localClusters[c].begin(), localClusters[c].end());
1397 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1403 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1409 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1415 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1418 if (
this != &clustering)
1420 data_ = std::move(clustering.data_);
1421 clusters_ = std::move(clustering.clusters_);
1427 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1435 for (
size_t o = 0; o < data.numberObservations(); ++o)
1437 const TSquareDistance localDistance =
sqrDistance(data[o]);
1439 if (localDistance < smallestDistance)
1441 smallestDistance = localDistance;
1446 return smallestIndex;
1449 template <
typename T,
size_t tDimension,
typename TSum,
typename TSquareDistance,
bool tUseIndices>
1452 TSquareDistance result = TSquareDistance(0);
1454 for (
size_t n = 0; n < tDimension; ++n)
1456 result += observation[n] * observation[n];
This class implements the abstract data object which will be specialized for both data modes toggled ...
Definition: ClusteringKMeans.h:45
size_t DataIndex
Definition of an index that addresses one specific observation element in the data object that stores...
Definition: ClusteringKMeans.h:70
Data< T, tDimension > & operator=(Data< T, tDimension > &&data) noexcept
Move operator.
size_t numberObservations() const
Returns the number of observations that are stored by this data object.
size_t numberObservations_
The number of observation elements of this data object.
Definition: ClusteringKMeans.h:151
const Observation & operator[](const DataIndex &dataIndex) const
Returns one specific observation of this data object specified by the data-index of this observation.
std::vector< DataIndex > DataIndices
Definition of a vector holding indices to the data object.
Definition: ClusteringKMeans.h:75
Data(const Observation *observations, const size_t numberObservations, const bool copyObservations=false)
Creates a new data object by observations lying in a joined memory block as array.
const Observation ** observationPointers_
The observation pointers of this data object.
Definition: ClusteringKMeans.h:253
std::vector< const Observation * > copyObservationPointers_
The optional observation pointers that are stored as copy.
Definition: ClusteringKMeans.h:250
const Observation * observations_
The observation objects of this data object.
Definition: ClusteringKMeans.h:148
const Observation & observation(const DataIndex &dataIndex) const
Returns one specific observation of this data object specified by the data-index of this observation.
Data(Data< T, tDimension > &&data) noexcept
Move constructor for an data object.
std::vector< Observation > copyObservations_
The optional observations that are stored as copy.
Definition: ClusteringKMeans.h:145
bool isValidDataIndex(const DataIndex &dataIndex) const
Returns whether a given data-index is valid and has a corresponding observation stored in this data o...
Data()=default
Creates a new empty data object.
Data(const Observation **observationPointers, const size_t numberObservations, const bool copyPointers=false)
Creates a new data object by observations lying at individual memory positions.
StaticBuffer< T, tDimension > Observation
Definition of an observation object.
Definition: ClusteringKMeans.h:65
This class implements the base class for all classes providing clustering algorithms.
Definition: ClusteringKMeans.h:35
This class implements one cluster that holds the mean values of all observations belonging to this cl...
Definition: ClusteringKMeans.h:308
TSquareDistance maximalSqrDistance(DataIndex *observationIndex=nullptr) const
Calculates the maximal square distance between the mean observation value of this cluster and all obs...
Definition: ClusteringKMeans.h:826
TSquareDistance sqrDistance(const Observation &observation) const
Returns the square distance between a given observation and this cluster (the mean observation value ...
Definition: ClusteringKMeans.h:813
DataIndices dataIndices_
The data indices of all observation that belong to this cluster.
Definition: ClusteringKMeans.h:412
Observation mean_
The mean observation value of this cluster.
Definition: ClusteringKMeans.h:409
const ClusteringKMeans< T, tDimension, TSum, TSquareDistance, tUseIndices > * owner_
The owner of this cluster.
Definition: ClusteringKMeans.h:406
Cluster & operator=(Cluster &&cluster) noexcept
Move operator moving a cluster object to this object.
Definition: ClusteringKMeans.h:897
Cluster(Cluster &&cluster) noexcept
Move constructor for another cluster object.
Definition: ClusteringKMeans.h:778
const DataIndices & dataIndices() const
Returns the indices of the observations that belong to this cluster.
Definition: ClusteringKMeans.h:885
const Observation & mean() const
Returns the mean observation value of this cluster.
Definition: ClusteringKMeans.h:879
bool operator<(const Cluster &cluster) const
Returns whether the left cluster has less elements than the right cluster.
Definition: ClusteringKMeans.h:912
void updateMean()
Updates the mean observation value of this cluster by application of the stored indices of all observ...
Definition: ClusteringKMeans.h:918
TSquareDistance averageSqrDistance() const
Calculates the average square distance between the mean observation value of this cluster and all obs...
Definition: ClusteringKMeans.h:857
This class implements a k-means clustering algorithm.
Definition: ClusteringKMeans.h:270
void clear()
Clears all determined clusters but registered the data information is untouched.
Definition: ClusteringKMeans.h:1398
void applyOptimizationIteration(Worker *worker)
Explicitly applies one further optimization iteration for an existing set of clusters.
Definition: ClusteringKMeans.h:1331
ClusteringKMeans(ClusteringKMeans &&clustering) noexcept
Move constructor.
Definition: ClusteringKMeans.h:958
std::vector< Cluster > Clusters
Definition of a vector holding cluster objects.
Definition: ClusteringKMeans.h:418
const Clusters & clusters() const
Returns the clusters of this k-means clustering object.
Definition: ClusteringKMeans.h:1136
Data data_
The data that stores the observations of this clustering object, either with index-access or pointer-...
Definition: ClusteringKMeans.h:593
void applyOptimizationIterationSubset(Lock *lock, const unsigned int firstObservation, const unsigned int numberObservations)
Explicitly applies one further optimization iteration for an existing set of clusters.
Definition: ClusteringKMeans.h:1359
ClusteringKMeans< T, tDimension, TSum, TSquareDistance, tUseIndices > & operator=(ClusteringKMeans &&clustering)
Move operator.
Definition: ClusteringKMeans.h:1416
ClusteringKMeans()
Creates an empty k-means object.
Definition: ClusteringKMeans.h:952
Data::DataIndices DataIndices
(Re-)Definition of a vector holding (size_t) indices.
Definition: ClusteringKMeans.h:297
ClusteringKMeans(Data &&data)
Creates a new k-means object by a given data object.
Definition: ClusteringKMeans.h:975
Data::DataIndex DataIndex
(Re-)Definition of an index that addresses one specific observation element in the data object that s...
Definition: ClusteringKMeans.h:292
static DataIndex smallestObservation(const Data &data)
Determines the smallest observation (euclidean distance to origin) from a set of observations.
Definition: ClusteringKMeans.h:1428
static TSquareDistance sqrDistance(const Observation &observation)
Returns the square distance between an observation and the origin.
Definition: ClusteringKMeans.h:1450
void determineClustersByDistance(const TSquareDistance maximalSqrDistance, size_t maximalClusters=0, const size_t iterations=5, Worker *worker=nullptr)
Determines the clusters for this object, ensure that this object has been initialized with a valid se...
Definition: ClusteringKMeans.h:1008
void determineClustersByNumber(const size_t numberClusters, const InitializationStrategy strategy=IS_LARGEST_DISTANCE, const size_t iterations=5, Worker *worker=nullptr)
Determines the clusters for this object, ensure that this object has been initialized with a valid se...
Definition: ClusteringKMeans.h:984
Data::Observation Observation
(Re-)Definition of an observation object.
Definition: ClusteringKMeans.h:302
bool addCluster(const size_t iterations=5, TSquareDistance sqrDistance=TSquareDistance(0), Worker *worker=nullptr)
Adds a new clusters for this object.
Definition: ClusteringKMeans.h:1029
bool isValid() const
Returns whether this object holds a valid set of observations.
Definition: ClusteringKMeans.h:1404
void determineInitialClustersRandom(const size_t numberClusters)
Determines the initial clusters for this object with the IS_RANDOM strategy.
Definition: ClusteringKMeans.h:1223
void removeCluster(const size_t iterations=5, Worker *worker=nullptr)
Removes one cluster from this object.
Definition: ClusteringKMeans.h:1065
Clusters clusters_
The current clusters of this object.
Definition: ClusteringKMeans.h:596
InitializationStrategy
Definition of individual initialization strategies.
Definition: ClusteringKMeans.h:277
@ IS_LARGEST_DISTANCE
The first cluster is determined by selection of the (euclidean) smallest observation,...
Definition: ClusteringKMeans.h:279
@ IS_RANDOM
All clusters are selected randomly.
Definition: ClusteringKMeans.h:281
Clustering< tUseIndices >::template Data< T, tDimension > Data
(Re-)Definition of a data object providing the data which will be clustered.
Definition: ClusteringKMeans.h:287
size_t findCluster(const Observation &observation)
Finds a best matching cluster for a given independent observation.
Definition: ClusteringKMeans.h:1116
void determineInitialClustersLargestDistance(const size_t numberClusters)
Determines the initial clusters for this object with the IS_LARGEST_DISTANCE strategy.
Definition: ClusteringKMeans.h:1161
ClusteringKMeans(const Data &data)
Creates a new k-means object by a given data object.
Definition: ClusteringKMeans.h:966
void sortClusters()
Sorts the clusters regarding their number of elements.
Definition: ClusteringKMeans.h:1142
TSquareDistance maximalSqrDistance() const
Calculates the maximal square distance between the mean observation value of each clusters and all ob...
Definition: ClusteringKMeans.h:1148
void applyOptimizationIteration()
Explicitly applies one further optimization iteration for an existing set of clusters.
Definition: ClusteringKMeans.h:1289
This class implements a recursive lock object.
Definition: Lock.h:31
This class provides basic numeric functionalities.
Definition: Numeric.h:57
This class implements an optional recursive scoped lock object locking the lock object only if it's d...
Definition: Lock.h:325
This class implements a static buffer that has a fixed capacity.
Definition: StaticBuffer.h:24
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
unsigned int sqrDistance(const char first, const char second)
Returns the square distance between two values.
Definition: base/Utilities.h:1089
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15