博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
K均值算法-python实现
阅读量:6692 次
发布时间:2019-06-25

本文共 15807 字,大约阅读时间需要 52 分钟。

 

 

测试数据展示:

 

 
#coding:utf-8 __author__ = 'similarface' ''' 实现K均值算法 算法摘要: ----------------------------- 输入:所有数据点A,聚类个数k 输出:k个聚类的中心点 随机选取k个初始的中心点 repeat:     计算每个点和中心点的距离,将点分配给最近的中心簇中     计算Ck,更新簇的中心点 until 中心点稳定 ----------------------------- ''' import sys import random import math from collections import defaultdict data=[[ -1.97869191 ,-10.41036729], [ -5.98733291 ,  1.09319552], [ -3.32042501 , -9.43521984], [ -6.95990143 ,  1.48643384], [ -0.51581431 , -2.95589335], [ -6.98576681 ,  2.3764377 ], [ -5.80752805 , -0.01880673], [ -2.3875164  , -3.79858985], [ -1.50834952 ,-11.55108763], [ -0.31932001 , -8.72310502], [ -0.10604775 , -1.97508279], [ -6.3698932  ,  2.17096658], [  0.98564842 , -2.22738281], [ -0.9889725  , -3.47291703], [ -1.03011438 , -1.02557957], [ -0.45064353 , -8.8508534 ], [  0.72986148 , -1.97091264], [ -0.24707289 , -9.8104778 ], [ -3.30332765 ,  3.32133548], [  1.02581503 , -2.93411237], [ -0.77207963 , -3.88246058], [  1.11435514 , -2.1403809 ], [ -3.01806146 , -8.44065141], [  2.07905596 , -0.39137275], [ -1.3132102  , -5.21553485], [  2.37703059 , -1.1452029 ], [ -3.71486953 , -9.36874886], [ -0.44554402 , -3.2977466 ], [ -1.07589398 , -8.07477912], [ -7.2774513  ,  3.49894912], [ -1.29923245 , -8.30647414], [ -1.39638106 , -4.26919995], [ -2.23638886 , -5.40208811], [ -2.50153112 , -7.67699872], [ -0.72081785 , -1.37019171], [ -5.89699295 ,  2.86422394], [ -0.66995787 , -9.05797846], [ -6.2936531  ,  1.50084162], [ -3.14819261 , -7.7502907 ], [  0.77192861 , -2.46670777], [ -1.38115313 , -1.23503221], [ -5.21920316 ,  2.77710219], [ -2.18234803 ,-10.07244764], [ -1.80391665 , -5.05103832], [  0.67755635 , -2.64355425], [ -0.52813711 ,-10.47523635], [ -4.22948326 ,  1.0542576 ], [  1.7408485  , -1.52644915], [ -0.6172497  , -0.79361238], [ -6.17304838 ,  1.40648868], [ -2.4369522  , -3.95312369], [ -2.33805418 ,-10.39048298], [ -0.89823572 , -8.91519992], [ -5.49292357 ,  1.36243111], [ -0.4075959  , -0.98602662], [ -1.46335853 , -8.46162063], [  1.0505005  , -1.13446366], [ -6.54744623 ,  2.29309604], [ -6.41410112 ,  2.1273699 ], [  2.49020135 , -2.82466326], [ -2.5027096  , -2.21684939], [  1.17331624 , -1.36048319], [  2.92640652 , -2.36434847], [ -1.57138311 , -2.48238607], [  0.14325405 , -2.14589394], [ -2.64755963 , -4.57606078], [  1.77939563 , -1.09938345], [ -1.74705392 , -5.14259238], [ -0.65895073 , -1.96921394], [ -1.40612244 , -3.5517328 ], [ -2.20539552 , -4.7867456 ], [  0.88989739 , -0.44520158], [ -6.21503005 ,  1.05650418], [ -1.61030464 , -3.15726266], [ -6.21496271 ,  2.16858806], [ -1.01573424 , -2.67549788], [ -0.4681611  , -4.89114339], [  1.36979547 , -1.42903191], [ -0.90965742 , -8.1218415 ], [ -2.57921362 , -1.84976724], [ -7.01537899 ,  1.79532873], [ -4.51668131 ,  0.73373973], [ -6.55623248 , -0.04283413], [  0.37487407 , -0.91475768], [  0.38115481 , -1.64481461], [ -4.11222325 , -2.00214115], [ -1.46957122 , -9.55869403], [ -6.87835953 ,  3.37557201], [ -6.49999403 ,  2.69702331], [ -2.9219904  , -4.13889999], [  1.62861332 ,  0.80867712], [ -2.13652734 , -3.20900184], [  0.08713347 , -8.26358973], [ -0.61588054 , -8.7465907 ], [ -1.91357867 , -3.14379003], [ -1.51220857 ,  0.53244231], [  0.99104311 , -1.43284403], [ -2.70008268 , -3.56958972], [ -5.8267567  ,  3.17894392], [  1.10320057 , -3.20707537], [  1.70531079 , -3.09426819], [  0.89454062 , -3.84466463], [ -1.34578645 , -4.86207938], [ -0.58498235 ,-11.51494191], [  1.05937597 , -0.7579938 ], [ -5.94171269 ,  3.08161308], [ -5.39980072 ,  4.37525462], [  0.73374694 , -4.02735671], [ -3.74456491 ,  3.04297057], [  1.70084242 , -1.17949827], [ -6.44717333 ,  2.13090812], [ -4.61625936 ,  2.74952795], [  0.42186795 , -1.00112008], [ -2.48625317 , -2.64140122], [ -0.16344961 , -0.05951747], [  0.82017839 , -1.68889855], [  1.8084556  , -3.36847451], [  0.30428829 , -4.02238273], [ -0.45539895 , -8.23326244], [ -0.17095868 ,-11.24639309], [ -1.47484741 , -3.92998889], [ -4.84939275 ,  3.22778867], [ -1.77602069 , -4.55311048], [  0.30937327 , -3.97368662], [  0.5124909  , -9.91048868], [ -0.80962387 , -3.80036663], [ -0.40311582 , -3.37210203], [ -1.28940953 , -4.22317842], [ -3.4964651  , -4.59144396], [ -0.90788399 , -5.10084263], [ -0.74104364 ,-10.07763506], [ -1.2800922  , -8.93912279], [ -1.66664693 , -4.37979994], [ -0.27973607 , -9.86256788], [  0.39010877 , -1.25159452], [  0.55523077 , -9.04078549], [  2.60842583 , -0.86794594], [ -0.84660563 , -8.20309613], [ -6.42496164 ,  2.96670557], [  1.92513692 ,  0.17477999], [ -2.15713739 , -5.48111104], [ -5.82420484 ,  1.29802453], [ -2.19316436 , -1.77843034], [ -1.87385754 , -8.3319748 ], [ -2.53552918 , -8.54331169], [ -5.04349522 ,  1.48053745], [ -0.32431771 , -3.31914574], [ -1.60368203 , -9.05765066], [ -3.93955141 , -9.57292799], [ -2.4311049  , -9.90799783], [ -1.66161844 , -9.41498635], [ -0.8118896  , -4.44914322], [ -1.02353151 ,-10.47025441], [  1.32915795 ,  0.61082376], [ -4.85863866 ,  2.71818185], [ -1.1656265  , -2.98631583], [ -2.57539962 , -1.30662085], [ -5.14115986 ,  2.11918259], [  2.24169986 , -2.5392787 ], [ -2.44130996 , -3.06176393], [  1.99964344 , -8.51061404], [ -1.69941844 , -9.61380426], [ -1.15527831 , -8.72497322], [ -1.65805364 , -2.62718768], [ -3.12203531 , -3.70129132], [ -1.91775697 ,-10.66908765], [ -3.52654286 ,  1.509838  ], [  0.20550002 , -0.86879848], [ -1.39452325 , -9.97964956], [ -0.8835818  , -9.7960928 ], [ -5.47503834 ,  1.3675566 ], [  0.29507856 , -1.05360095], [  0.81825271 , -2.73069558], [ -1.65577138 , -9.55594613], [ -0.10657046 ,-11.82507855], [ -6.43716673 ,  1.12621231], [ -2.68080553 , -4.15115913], [ -5.46682052 ,  4.6280828 ], [  1.57945366 , -1.31393944], [  2.44441946 , -2.35848003], [ -2.25041232 , -4.64475199], [ -5.93596316 ,  3.55810189], [ -6.08327339 ,  1.26745748], [  1.506501   , -1.62839627], [  1.78230921 , -2.91522595], [  2.82859067 , -8.66035449], [ -0.66876118 , -9.43350477], [  0.91100418 , -1.59821873], [ -0.16221522 , -9.75571745], [  0.15873435 , -2.29051183], [ -7.5806633  ,  2.88460368], [ -2.97040189 , -2.2072549 ], [ -5.95789399 ,  1.05100704], [ -2.41323523 , -9.04838281], [ -5.05820587 ,  1.75215814], [ -1.30140995 , -3.59063453], [ -1.29343329 , -2.7140364 ], [ -6.15517065 ,  2.47899111], [ -1.59361015 , -8.71046363], [  0.82608078 , -1.87016308], [  1.71860282 , -1.65302661], [  0.31995672 , -3.43856678], [ -1.9993558  ,-10.06488996], [ -5.20501379 ,  2.0292834 ], [  1.13908963 , -1.6936582 ], [ -6.16415229 ,  2.24844103], [ -0.41050376 ,-10.56856594], [ -7.29419673 ,  2.10875296], [  0.68097889 ,  0.85112594], [  1.1848232  , -1.95576116], [ -6.13784033 ,  3.27454164], [ -4.94592301 ,  3.50193532], [ -0.89962999 , -9.69861063], [ -5.31271816 ,  2.30731199], [ -1.27736788 , -1.52709537], [  0.33970811 ,  0.11528184], [ -1.74223531 , -3.63574418], [ -5.11750476 ,  2.30467137], [ -1.60515159 ,-10.04170987], [ -1.75660679 , -3.02168142], [ -1.09969215 , -8.92831109], [ -8.09999402 ,  2.98488494], [ -5.4498388  ,  2.21471778], [ -1.77514158 , -5.22156992], [ -1.06398595 ,-11.34008775], [ -1.07153453 , -4.10149796], [ -7.24043131 ,  1.91557865], [ -6.33736287 ,  0.43514226], [  0.62173043 ,  1.86741382], [ -2.11753563 , -3.98311226], [  0.46171023 , -9.92897624], [  2.82419621 , -0.35337615], [ -7.72527978 ,  4.42206927], [ -0.49463392 , -4.41118163], [ -7.36970566 ,  1.76857486], [ -8.07564582 ,  1.72023916], [ -2.10923725 , -9.39376515], [ -0.91504844 , -8.70739333], [  2.6351642  , -0.98185444], [ -2.41442044 ,-10.18889625], [  2.02143446 , -2.01543187], [ -0.92096863 , -8.85925495], [ -2.17903191 , -1.65878724], [ -6.25233557 ,  2.33764219], [ -1.60598371 , -4.16162683], [ -1.64458105 ,-10.35745484], [ -1.03866233 , -8.98404971], [ -2.76256743 , -8.63516347], [  0.80420551 , -1.74288075], [ -0.03026543 , -1.74172697], [ -1.93726763 , -5.39538281], [ -3.3712446  , -3.89409507], [ -1.61892392 , -9.71765939], [ -5.69386864 ,  3.93793276], [ -5.34498618 ,  2.0693253 ], [ -0.77824475 ,-10.32568907], [ -1.80769409 , -4.46833214], [ -1.68399423 ,-10.86599403], [ -1.3196722  , -9.15547193], [ -0.06811619 , -1.40206897], [ -1.07371903 , -3.88629849], [ -1.73432981 , -8.96710465], [ -2.18736646 , -3.70811542], [ -7.1865842  ,  3.11806934], [ -2.90291449 , -3.02986961], [ -1.93061611 , -3.05009085], [  1.27033628 ,-10.95464861], [ -2.30151669 , -9.04907966], [  0.21944157 , -5.44956932], [ -4.95790559 ,  2.43632632], [ -5.09335092 ,  2.47355038], [ -5.33075221 ,  2.57934775], [ -4.91352172 ,  0.70785394], [  1.07861399 , -0.62821787], [ -5.61777478 ,  2.78571681], [ -0.75580553 , -8.74619579], [ -6.84289623 ,  3.12082979], [ -1.69560499 , -2.72407455], [  0.18757605 , -9.8171527 ], [ -2.34090099 , -8.88980884], [ -1.02993907 , -3.97537434], [ -7.952894   ,  2.87991319], [ -5.90898023 ,  1.15375484], [ -0.20175034 , -1.78148269], [ -5.03133839 ,  2.37464369], [ -5.35976552 ,  2.11910146], [  1.38354601 , -0.31804274], [ -6.29083717 ,  3.60726959], [ -2.41154316 , -3.72594284], [  1.70677401 ,  1.23618273], [  1.83086535 , -1.42523455], [ -1.09860809 , -1.84702593], [ -2.67919211 , -3.62422108], [ -6.77223728 ,  4.13723749], [ -1.74210731 , -9.13058687], [ -2.67557352 , -4.3499291 ], [ -2.45517504 , -5.13617648], [ -1.54915892 , -7.25010857], [  1.81313467 , -1.92467083], [ -1.5841884  , -6.8961805 ], [ -1.19769074 , -4.59711705], [ -5.40166242 ,  3.12407116], [ -0.67858614 , -9.47781587], [  0.83352543 , -0.74460559], [ -2.47535278 , -2.50855939], [ -1.42824915 , -7.98003845], [ -8.01058566 ,  1.63404449], [  2.0119666  ,  1.00882614], [ -4.81816885 ,  3.72073108], [ -6.27164232 ,  0.74780494], [ -5.65408139 ,  1.0799859 ], [  2.13810493 , -1.44566983], [  2.61434254 , -0.58086887], [ -4.6059069  ,  1.38411417], [  0.44030012 , -0.99402533], [ -1.91478126 , -8.97307912], [ -6.36433615 ,  1.6497788 ], [ -0.07381757 , -1.94648329], [ -0.72864791 , -7.18926735], [ -5.59867106 ,  1.66313127], [  0.70973004 , -0.75512788], [ -1.33633557 , -2.6256785 ], [ -2.15820985 , -9.63790953], [ -4.33013714 ,  0.97871974], [ -1.74844822 ,-10.73619567], [ -0.16767692 , -3.84016148], [ -2.02797291 , -9.47245011], [ -6.19473103 ,  2.41547938], [ -2.73346631 , -9.81949314], [  1.05371201 , -2.63214103], [  1.59306999 , -0.74416768], [ -6.4721467  ,  2.95054106], [ -5.02266832 ,  2.53430552], [ -1.20943949 , -3.81029773], [ -2.39099269 , -3.38764578], [  1.86409032 , -0.70074535], [ -0.8131639  , -2.36670563], [ -1.55628145 , -9.99835926], [ -1.9233198  , -3.10609538], [ -6.09795188 ,  1.76016581], [ -0.13265422 , -0.80505548], [  1.40927131 , -1.35139941], [ -6.06728988 ,  2.43844581], [ -6.77797943 ,  2.21185794], [ -3.09368405 , -5.90874304], [  1.55591864 , -1.4580672 ], [  2.54154025 , -1.60938019], [ -3.50927448 , -5.62064487], [ -5.61928015 ,  3.08987021], [ -1.07380783 , -3.02885557], [  0.15881217 , -1.95127059], [ -1.93335222 , -8.63901908], [  1.62504848 , -0.87595942], [  1.8230386  , -0.9574862 ], [ -0.43711337 , -3.64783404], [  0.110124   ,  0.25183468], [ -0.48092196 , -8.2188617 ], [ -1.95777753 ,-10.92091439], [  1.3203166  , -2.74891159], [  1.54591325 , -2.13399516], [ -0.11858047 , -9.09539732], [ -8.03689652 ,  1.51829382], [  2.12599563 , -0.9232473 ], [ -6.6466344  ,  1.43966762], [ -2.53164296 , -2.76452777], [ -2.82677657 ,-11.5641273 ], [ -2.60710702 , -4.50856754], [  0.05546421 , -9.17884603], [ -2.3788409  , -7.89698831], [ -5.90466798 ,  3.09117187], [ -6.37783409 ,  2.28944986], [ -6.3933459  ,  2.05685086], [  0.68989568 , -3.88908243], [  0.51326445 , -2.31320125], [ -1.44144678 , -2.6282341 ], [ -2.69891251 , -9.73324948], [ -0.28764562 , -0.98370587], [  0.25165836 ,-11.46732114], [ -4.95384992 ,  2.04017736], [ -0.85715442 , -0.96306408], [  2.10389484 , -1.66689096], [ -6.81038823 ,  2.37777702], [  0.78454593 , -1.15593416], [ -1.45368824 , -3.27385342], [ -0.37638912 ,  1.3767851 ], [ -5.06831433 ,  2.53524728], [ -2.70131918 , -9.63497056], [ -1.23856256 ,-10.59940081], [ -1.93958449 , -2.98186006], [ -0.30387455 , -3.25837812], [ -4.98980684 ,  3.66124623], [ -2.84011639 , -3.47084983], [ -1.68584182 , -3.25767216], [ -0.58390398 , -8.78405909], [ -0.78216181 , -9.35497119], [ -5.58569152 ,  1.43897246], [ -1.65427904 , -4.34620073], [ -1.38595406 , -3.46417994], [  0.57884096 , -1.22623874], [  2.03872755 ,  0.07546388]] def calc_geometric_distance(pointA,pointB):     '''     计算两个点的几何距离 兼容多纬度     :param pointA:     :param pointB:     :return:     '''     if len(pointA)==len(pointB):         sumct = 0         for idx in range(len(pointA)):             sumct += math.pow(pointA[idx]-pointB[idx],2)         return math.sqrt(sumct)     return None def getInitCenter(data,k):     '''     随机选择k个点做中心点     :param data:     :param k:     :return:     '''     initcenter=[]     for i in range(k):         idx=random.randint(0,len(data))         initcenter.append(data[idx])     return initcenter def cluser(dataset):     '''     将单个点 向 所有中心点距离最近的归属     :param dataset:     :return:     '''     k_dataset=defaultdict(list)     for k,v in dataset.items():         k_dataset[v.index(min(v))+1].append(k)     return k_dataset def cluser_data(data,data_idx):     '''     还原数据 开始存放的是数据的索引     :param data: 数据     :param data_idx: 数据索引集合     :return:     '''     data_cluster={}     for k,v in data_idx.items():         data_cluster[k]=[data[item] for item in v]     return data_cluster def calc_center(clusterdataset):     '''     计算中心点     :param clusterdataset:     :return:     '''     centers=[]     #遍历每一个簇的数据     for k,v in clusterdataset.items():         #单个簇的数据个数         numct=float(len(v))         #将单个簇的数据对应特征求和         sum_vecter=[0]*len(v[0])         for item in v:             for ix in range(len(sum_vecter)):                 sum_vecter[ix]=sum_vecter[ix]+item[ix]         #单个簇的均值         avg_vector=[item/numct for item in sum_vecter]         #为中心点         centers.append(avg_vector)     return centers def k_means(data,initcenter,i):     distances=defaultdict(list)     for idx_data in range(len(data)):         item=data[idx_data]         #点到所有中心点几何         for center in initcenter:             #点到中心点距离             point_center_d=calc_geometric_distance(item,center)             distances[idx_data].append(point_center_d)     initcenter=calc_center(cluser_data(data,cluser(distances)))     print initcenter,i     i=i+1     if i>100:         sys.exit(-1)     k_means(data,initcenter,i) #    print distances     return None if __name__=='__main__':     k=4     i=0     initcenter=getInitCenter(data,k)     k_means(data,initcenter,i) ''' 数据在第次收敛: [[-1.090354490561798, -9.544300641123598], [-3.4009604193589746, -3.448081201923078], [-0.19541685830769231, -3.3116750070769227], [-2.446104015535713, 0.7224692077976191]] 0 [[-1.2749446563000004, -9.409093471199999], [-2.470575770652174, -3.807719888695652], [0.35515101615942024, -2.3586021581159424], [-5.119257947586205, 1.9765036246551722]] 1 [[-1.2749446563000004, -9.409093471199999], [-1.967171268292682, -3.7557153614634147], [0.8185451994915249, -1.6050642896610166], [-5.937498566799999, 2.2599845357999997]] 2 [[-1.2749446563000004, -9.409093471199999], [-1.7570419110638291, -3.732020352340426], [0.9475681998113205, -1.3826069122641511], [-5.937498566799999, 2.2599845357999997]] 3 [[-1.2749446563000004, -9.409093471199999], [-1.6700011342424235, -3.7007944505050507], [0.9961425923762373, -1.2969068833663366], [-5.937498566799999, 2.2599845357999997]] 4 [[-1.2749446563000004, -9.409093471199999], [-1.6660748016999996, -3.6790574597], [1.0188776970999995, -1.2946049985], [-5.937498566799999, 2.2599845357999997]] 5 [[-1.2749446563000004, -9.409093471199999], [-1.6660748016999996, -3.6790574597], [1.0188776970999995, -1.2946049985], [-5.937498566799999, 2.2599845357999997]] 6 ''
 

 

转载于:https://www.cnblogs.com/similarface/p/7467473.html

你可能感兴趣的文章
质量时代——“Jolt大奖精选丛书”有奖征文
查看>>
DNS服务器维护命令
查看>>
六、用户与权限
查看>>
面向机器学习数据平台的设计与搭建
查看>>
centos6.7 编译安装mysql-5.6.27
查看>>
spring cloud 整合zpkin问题
查看>>
Maven下载慢的解决方案
查看>>
我的友情链接
查看>>
Android 核心分析 之七------Service深入分析
查看>>
Regsvr32使用方法
查看>>
柱形图Demo
查看>>
编辑器
查看>>
关闭windows的默认共享
查看>>
react开发环境搭建
查看>>
数据库读写分离
查看>>
社交是微信营销
查看>>
2008 R2 证书服务器应用详解
查看>>
hive 动态分区太多问题
查看>>
Windows Server 2008 RemoteApp(二)---部署激活远程桌面授权服务器
查看>>
读取日志文件开发总结
查看>>