K值聚类(一)
下面这段代码是 OpenCV-Python-Tutorial-中文版.pdf (P281)中的实现,
当前系列所有demo下载地址:
https://github.com/GaoRenBao/OpenCv4-Demo
https://gitee.com/fuckgrb/OpenCv4-Demo
不同编程语言对应的OpenCv版本以及开发环境信息如下:
语言 | OpenCv版本 | IDE |
C# | OpenCvSharp4.4.8.0.20230708 | Visual Studio 2022 |
C++ | OpenCv-4.5.5-vc14_vc15 | Visual Studio 2022 |
Python | OpenCv-Python (4.6.0.66) | PyCharm Community Edition 2022.1.3 |
详细介绍请查看PDF文档
C#版本代码如下:
代码如下:
using OpenCvSharp;
using System;
namespace demo
{
internal class Program
{
static void Main(string[] args)
{
const int maxClusters = 5;
var rng = new RNG(state: (ulong)DateTime.Now.Ticks);
while (true)
{
var clustersCount = rng.Uniform(a: 2, b: maxClusters + 1);
var samplesCount = rng.Uniform(a: 1, b: 1001);
var points = new Mat(rows: samplesCount, cols: 1, type: MatType.CV_32FC2);
clustersCount = Math.Min(clustersCount, samplesCount);
var img = new Mat(rows: 500, cols: 500, type: MatType.CV_8UC4, s: Scalar.All(0));
// generate random sample from multi-gaussian distribution
for (var k = 0; k < clustersCount; k++)
{
var pointChunk = points.RowRange(
startRow: k * samplesCount / clustersCount,
endRow: (k == clustersCount - 1)
? samplesCount
: (k + 1) * samplesCount / clustersCount);
var center = new Point
{
X = rng.Uniform(a: 0, b: img.Cols),
Y = rng.Uniform(a: 0, b: img.Rows)
};
rng.Fill(
mat: pointChunk,
distType: DistributionType.Normal,
a: new Scalar(center.X, center.Y),
b: new Scalar(img.Cols * 0.05f, img.Rows * 0.05f));
}
Cv2.RandShuffle(dst: points, iterFactor: 1, ref rng);
var labels = new Mat();
var centers = new Mat(rows: clustersCount, cols: 1, type: points.Type());
Cv2.Kmeans(
data: points,
k: clustersCount,
bestLabels: labels,
criteria: new TermCriteria(CriteriaTypes.Eps | CriteriaTypes.MaxIter, 10, 1.0),
attempts: 3,
flags: KMeansFlags.PpCenters,
centers: centers);
Scalar[] colors =
{
new Scalar(0, 0, 255),
new Scalar(0, 255, 0),
new Scalar(255, 100, 100),
new Scalar(255, 0, 255),
new Scalar(0, 255, 255)
};
for (var i = 0; i < samplesCount; i++)
{
var clusterIdx = labels.At<int>(i);
Point2f ipt = points.At<Point2f>(i);
Cv2.Circle(
img: img,
center: new Point(ipt.X, ipt.Y),
radius: 2,
color: colors[clusterIdx],
lineType: LineTypes.AntiAlias,
thickness: 1);
}
Cv2.ImShow("img", img);
var key = (char)Cv2.WaitKey();
if (key == 27 || key == 'q' || key == 'Q') // 'ESC'
{
break;
}
}
}
}
}
C++版本代码如下:
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
using namespace cv;
using namespace std;
int main()
{
const int maxClusters = 5;
cv::RNG rng(12345);
while (true)
{
int clustersCount = rng.uniform(2, maxClusters + 1);
int samplesCount = rng.uniform(1, 1001);
Mat points(samplesCount,1, CV_32FC2);
clustersCount = min(clustersCount, samplesCount);
Mat img(500, 500, CV_8UC4, Scalar::all(0));
// generate random sample from multi-gaussian distribution
for (int k = 0; k < clustersCount; k++)
{
Mat pointChunk = points.rowRange(
k * samplesCount / clustersCount,
(k == clustersCount - 1) ? samplesCount : (k + 1) * samplesCount / clustersCount);
Point center(rng.uniform(0, img.cols), rng.uniform(0, img.rows));
rng.fill(
pointChunk,
cv::RNG::NORMAL,
Scalar(center.x, center.y),
Scalar(img.cols * 0.05f, img.rows * 0.05f));
}
cv::randShuffle(points, 1, &rng);
Mat labels;
Mat centers(clustersCount, 1, points.type());
cv::kmeans(
points,
clustersCount,
labels,
TermCriteria(TermCriteria::EPS | TermCriteria::MAX_ITER, 10, 1.0),
3,
KmeansFlags::KMEANS_PP_CENTERS,
centers);
vector<Scalar> colors;
colors.push_back(Scalar(0, 0, 255));
colors.push_back(Scalar(0, 255, 0));
colors.push_back(Scalar(255, 100, 100));
colors.push_back(Scalar(255, 0, 255));
colors.push_back(Scalar(0, 255, 255));
for (int i = 0; i < samplesCount; i++)
{
int clusterIdx = labels.at<int>(i);
Point2f ipt = points.at<Point2f>(i);
cv::circle(
img,
Point(ipt.x, ipt.y),
2,
colors[clusterIdx],
LineTypes::LINE_AA,
1);
}
cv::imshow("img", img);
char key = (char)cv::waitKey();
if (key == 27 || key == 'q' || key == 'Q') // 'ESC'
{
break;
}
}
return 0;
}
Python版本代码如下:
demo1:仅有一个特征的数据
运行输出结果如下,和PDF中的效果好像不太一样。。。
假设我们有一组数据,每个数据只有一个特征(1维)。例如前面的T恤问题,我们只使用人们的身高来决定T恤的大小。
我们先来产生一些随机数据,并使用 Matplotlib 将它们绘制出来
代码如下:
import numpy as np
import cv2
from matplotlib import pyplot as plt
x = np.random.randint(25, 100, 25)
y = np.random.randint(175, 255, 25)
z = np.hstack((x, y))
z = z.reshape((50, 1))
z = np.float32(z)
plt.hist(z, 256, [0, 256]), plt.show()
# 现在我们有一个 度为 50 取值范围为 0 到 255 的向量z。我已经将向量z 重排 将它变成了一个列向量。
# 当每个数据含有多个特征是 会很有用。然后我们数据类型 换成 np.float32。
# exit(0)
##
# 现在我们使用KMeans函数。在之前我们应先置好终止条件。我的终止条件是算法执10次代或者精确度epsilon=1.0。
# Define criteria = ( type, max_iter = 10 , epsilon = 1.0 )
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
# Set flags (Just to avoid line break in the code)
flags = cv2.KMEANS_RANDOM_CENTERS
# Apply KMeans
compactness, labels, centers = cv2.kmeans(z, 2, None, criteria, 10, flags)
# 返回值有紧密度compactness,标志和中心。在本例中我的到的中心是60和207。标志的数目与测数据的多少是相同的每个数据会标上01等。取决与它们的中心是什么。
A = z[labels == 0]
B = z[labels == 1]
# 现在我们可以根据它们的标志将把数据分两组。
# 现在将A组数用红色示
# 将B组数据用蓝色示,重心用黄色示。
# Now plot 'A' in red, 'B' in blue, 'centers' in yellow
plt.hist(A, 256, [0, 256], color='r')
plt.hist(B, 256, [0, 256], color='b')
plt.hist(centers, 32, [0, 256], color='y')
plt.show()
demo2:含有多个特征的数据
在前面的T恤例子中我们只考虑了身高,现在我们也把体重考虑进去,也就是两个特征。
在前一节我们的数据是一个单列向量。每一个特征被排列成一列,每一行对应一个测试样本。在本例中我们的测试数据适应 50x2 的向量,其中包含 50个人的身高和体重。第一列对应与身高,第二列对应与体重。第一行包含两个元素,第一个是第一个人的身高,第二个是第一个人的体重。剩下的行对应与其他人的身高和体重。
代码如下:
import numpy as np
import cv2
from matplotlib import pyplot as plt
X = np.random.randint(25, 50, (25, 2))
Y = np.random.randint(60, 85, (25, 2))
Z = np.vstack((X, Y))
# convert to np.float32
Z = np.float32(Z)
# define criteria and apply kmeans()
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret, label, center = cv2.kmeans(Z, 2, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
# Now separate the data, Note the flatten()
A = Z[label.ravel() == 0]
B = Z[label.ravel() == 1]
# Plot the data
plt.scatter(A[:, 0], A[:, 1])
plt.scatter(B[:, 0], B[:, 1], c='r')
plt.scatter(center[:, 0], center[:, 1], s=80, c='y', marker='s')
plt.xlabel('Height'), plt.ylabel('Weight')
plt.show()