1.matlab函数
% k_means algorithm
% data: 需要分类的数据,列:特征 行:样本% k: 需要分类数, iter: 迭代次数,默认为样本个数 threshold 为前后两次均值差的平方和的阈值,%低于该阈值则认为收敛,停止搜索,由于不同情况下阈值差异较大,故默认为0,由增加迭代次数来保证结果的收敛function [u,myclass] = mykmeans(data, k, iter, threshold)[row, col] = size(data); u = data(randint(1,k,[1,row]),:);u_backup = u;if iter == 0 iter = row;enddiff = zeros(k,col);for j = 1:itermyclass = zeros(k,row);add = zeros(k,col);for i = 1:row for m = 1:k diff(m,:) = data(i,:) - u(m,:); end distance = diag(diff * diff'); [~, min_num] = min(distance); myclass(min_num,i) = 1; % 将属于该类的样本标记为1 add(min_num,:) = add(min_num,:) + data(i,:);endfor n = 1:k u(n,:) = add(n,:)/length(find(myclass(n,:)==1));end tr = trace((u - u_backup) * (u - u_backup)'); if(threshold ~= 0 && tr < threshold) break; endend2.C#下混合编程
using System;
using System.Collections.Generic;using System.Linq;using System.Text;using System.IO;using matPrj;using MathWorks.MATLAB.NET.Arrays;using MathWorks.MATLAB.NET.Utility;namespace mykmeans{ class Program { static void Main(string[] args) { myMathclass myfun = new myMathclass(); StreamReader mydata_row = new StreamReader("data.txt"); string line = null; Int16 row = 0; while ((line = mydata_row.ReadLine()) != null) { row++; } mydata_row.Close(); double[] data = new double[row * 2]; StreamReader mydata = new StreamReader("data.txt"); Int16 i = 0; while ((line = mydata.ReadLine()) != null) { foreach (string str in line.Split(',')) { data[i] = Convert.ToInt16(str); i++; } } MWNumericArray m_data = new MWNumericArray(16 , 2, data); MWArray[] output = new MWArray[2]; MWArray[] input = new MWArray[4] { m_data, 2, 0, 0}; myfun.mykmeans (2, ref output, input); MWNumericArray x1 = output[0] as MWNumericArray; MWNumericArray x2 = output[1] as MWNumericArray; } }}3. 测试数据(分两类)
1,2
2,33,41,32,45,81,92,811,3456,9110,234239,300123,456232,222194,254788,2344.结果
u =
8.4000 8.4000 281.0000 283.3333myclass = Columns 1 through 12 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 Columns 13 through 16 0 0 0 0 1 1 1 1