当前位置：首页 > news >正文

flash网站开源扬州天达建设集团有限公司网站

news 2026/1/9 21:34:46

flash网站开源,扬州天达建设集团有限公司网站,开发者模式在哪里打开,苏州互联网企业聚类(Clustering):hierarchical clustering 层次聚类及其应用 clustering实现#xff1a; from numpy import * import math #基于matrix计算的pakage#定义树形结构图的结点#xff0c;当再往上两则分为一类时则将其连接起来 #用面向对象(class)的方法实现此次的代码: clas…聚类(Clustering):hierarchical clustering 层次聚类及其应用 clustering实现 from numpy import * import math #基于matrix计算的pakage#定义树形结构图的结点当再往上两则分为一类时则将其连接起来 #用面向对象(class)的方法实现此次的代码: class cluster_node:def __init__ (self,vec,leftNone,rightNone,distance0.0,idNone,count1):#初始化的函数每次实例化这个类的时候都会自动调用次函数(同java中的构造函数,self同java中的this)vec:传入的数据为numpy array每一行就是一个vecleft:左节点right右节点distance结点之间距离id:给节点定义一个名字算节点平均值时要区分哪些节点包含在哪count节点计数self.vec vecself.left leftself.right rightself.distance distanceself.id idself.count count #only used for weighted average #找出最近的两点的距离: def L2dist(v1,v2):return math.sqrt(sum((v1-v2)**2)) #2维的两点(向量计算)的直线距离,向量为坐标形式一个坐标包含xy两个数值def L1dist(v1,v2): #计算一维的距离时使用return sum(abs(v1-v2))# def Chi2dist(v1,v2):# return sqrt(sum((v1-v2)**2))#实现 HC的具体树状结构 def hcluster(features,distanceL2dist):features:类型为numpy arraydistance:默认为2D距离distances {} #用来储存所有distance,注意字典distances和函数distance的区别currentid -1 #初始化实例的clusterid#最开始每一个单独的数据就是一个单独的cluster对应的就是feature(array/matrix)中的每一行 # clusters are initially just the individual rowsclust [cluster_node(array(features[i]),idi) for i in range(len(features))]#注意这里array赋值属性的用法和for循环的使用方法features的每一组[]里的数据对应一个点每一个点都赋值一个类别i,调用cluster_node#进行分类知道所有分类个数为1时结束循环while len(clust)1:lowestpair (0,1)#初始化以(0,1)为最近的组closest distance(clust[0].vec,clust[1].vec)#首先取前两个向量,只有两个向量他们的距离也就是最小的(调用了L2dist计算最小距离)# loop through every pair looking for the smallest distancefor i in range(len(clust)):for j in range(len(clust)):# distances is the cache of distance calculationif (clust[i].id,clust[j].id) not in distances:distances[clust[i].id,clust[j].id] distance(clust[i].vec,clust[j].vec)将i,j两点的距离计算并传到字典distances中d distances[(clust[i].id,clust[j].id)]if d closest: #比较出最小距离,赋值给closest,同时更新最近距离的对组closest dlowestpair (i,j) # calculate the average of the two clusters.有多种定义分类的方法(最近距离、最远距离、均值、中值),这里取平均距离print(clust[lowestpair[0].vec[0]])mergevec [(clust[lowestpair[0]].vec[i]clust[lowestpair[1]].vec[i])/2.0 for i in range(len(clust[0].vec))]# create the new clusternewcluster cluster_node(array(mergevec),leftclust[lowestpair[0]],rightclust[lowestpair[1]],distanceclosest,idcurrentid)# cluster ids that werent in the original set are negative# 不在原始的数据集合中currentid为负数currentid - 1#每执行完一次结点计算就将之前的结点坐标值删除del clust[lowestpair[0]]del clust[lowestpair[1]]clust.append(newcluster)return clust[0]# (以上已经建立好树状图了)def extract_cluster(clust,dist): #dist为预设的距离值# extract list of sub-tree clusters from hcluster tree with distancedistcluster {}if clust.distancedist:# we have found a cluster subtreereturn [clust]else:# check the right and left branchescl []cr []if clust.left ! None:#这里可以直接.leftcl extract_cluster(clust.left,distdist)if clust.right ! None:cr extract_cluster(clust.right,distdist)return clcrdef get_cluster_elements(clust):# return ids for elements in a cluster sub-tree# 如果该要求的clust没有子集那就返回他本身如果有子集就返回左子集和右子集相加if clust.id0:# positive id means that this is a leafreturn clust.idelse:# check the right and left branchescl []cr []if clust.left ! None:cl get_cluster_elements(clust.left)if clust.right !None:cr get_cluster_elements(clust.right)return clcrdef printclust(clust,labelsNone,n0):for i in range(n): print(),if clust.id0:# negative id means that this is branchprint(-)else:# positive id means that this is an endpointif labelsNone: print(clust.id)else: print(labels[clust.id])if clust.left ! None: printclust(clust.left,labelslabels,nn1)if clust.right !None: printclust(clust.right,labelslabels,nn1)def getheight(clust):# Is this an endpoint? Then the height is just 1if clust.left None and clust.right None: return 1# Otherwise the height is the same of the heights of each branchreturn getheight(clust.left)getheight(clust.right) #为什么还要加上getheight这个函数def getdepth(clust): #深度是# The distance of an endpoint is 0.0if clust.left None and clust.right None: return 0 # The distance of a branch is the greater of its two sides plus its own distance return max(getdepth(clust.left),getdepth(clust.right))clust.distanceclustering代码应用:(借用链接https://blog.csdn.net/weixin_41790863/article/details/81412564 ) from PIL import ImageDraw, Image import numpy as np import os import sysnodeList [] # 用于存储所有的节点包含图片节点与聚类后的节点 distance {} # 用于存储所有每两个节点的距离数据格式{(node1.id,node2.id):30.0,(node2.id,node3.id):40.0}class node:def __init__(self, data):每个样本及样本合并后节点的类data接受两种格式1、当为字符string时是图片的地址同时也表示这个节点就是图片2、合并后的类传入的格式为(leftNode,rightNode) 即当前类表示合并后的新类而对应的左右节点就是子节点self.id len(nodeList) # 设置一个ID,以nodeList当然长度为ID,在本例中ID本身没太大用处只是如果看代码时有时要看指向时有点用self.parent None # 指向合并后的类self.pos None # 用于最后绘制节构图使用赋值时为(x,y,w,h)格式if type(data) type():节点为图片self.imgData Image.open(data)self.left Noneself.right Noneself.level 0 # 图片为最终的子节点所有图片的层级都为0设置层级是为了最终绘制结构图npTmp np.array(self.imgData).reshape(-1, 3) # 将图片数据转化为numpy数据shape为(高宽3)3为颜色通道npTmp npTmp.reshape(-1, 3) # 重新排列shape为(高*宽3)self.feature npTmp.mean(axis0) # 计算RGB三个颜色通道均值else:节点为合成的新类self.imgData Noneself.left data[0]self.right data[1]self.left.parent selfself.right.parent selfself.level max(self.left.level, self.right.level) 1 # 层级为左右节高层级的级数1self.feature (self.left.feature self.right.feature) / 2 # 两类的合成一类时就是左右节点的feature相加/2# 计算该类与每个其他类的距离并存入distancefor x in nodeList:distance[(x, self)] np.sqrt(np.sum((x.feature - self.feature) ** 2))nodeList.append(self) # 将本类加入nodeList变量def drawNode(self, img, draw, vLineLenght):# 绘制结构图if self.pos None: returnif self.left None:# 如果是图片self.imgData.thumbnail((self.pos[2], self.pos[3])) #thumbnail将图片变小成缩略图img.paste(self.imgData, (self.pos[0], self.pos[1]))draw.line((int(self.pos[0] self.pos[2] / 2), self.pos[1] - vLineLenght, int(self.pos[0] self.pos[2] / 2), self.pos[1]), fill(255, 0, 0))else:# 如果不是图片draw.line((int(self.pos[0]), self.pos[1], int(self.pos[0] self.pos[2]), self.pos[1]), fill(255, 0, 0))draw.line((int(self.pos[0] self.pos[2] / 2), self.pos[1], int(self.pos[0] self.pos[2] / 2), self.pos[1] - self.pos[3]), fill(255, 0, 0))def loadImg(path):path 图片目录根据自己存的地方改写files Nonetry:files os.listdir(path)except:print(未正确读取目录 path ,图片目录请根据自己存的地方改写,并保证没有hierarchicalResult.jpg,该文件为最后生成文件)return Nonefor i in files:if os.path.splitext(i)[1].lower() .jpg and os.path.splitext(i)[0].lower() ! hierarchicalresult:fileName os.path.join(path, i)node(fileName)return os.path.join(path, hierarchicalResult.jpg)def getMinDistance():从distance中过滤出未分类的结点并读取最小的距离vars list(filter(lambda x: x[0].parent None and x[1].parent None, distance))minDist vars[0]for x in vars:if minDist None or distance[x] distance[minDist]:minDist xreturn minDistdef createTree():while len(list(filter(lambda x: x.parent None, nodeList))) 1: # 合并到最后时只有一个类只要有两个以上未合并就循环minDist getMinDistance()# 创建非图片的节点之所以把[1]做为左节点因为绘图时的需要# 在不断的产生非图片节点时在nodeList的后面的一般是新节点但绘图时绘在左边node((minDist[1], minDist[0]))return nodeList[-1] # 最后一个插入的节点就是要节点def run():root createTree() # 创建树结构# 一句话的PYTON实现二叉树的左右根遍历通过通过遍历进行排序后取出图片做为最底层的打印sortTree lambda node: ([] if node.left None else sortTree(node.left)) ([] if node.right None else sortTree(node.right)) [node]treeTmp sortTree(root)treeTmp list(filter(lambda x: x.left None, treeTmp)) # 没有左节点的即为图片thumbSize 60 # 缩略图的大小在60X60的小格内缩放thumbSpace 20 # 缩略图间距vLineLenght 80 # 上下节点即每个level之间的高度imgWidth len(treeTmp) * (thumbSize thumbSpace)imgHeight (root.level 1) * vLineLenght thumbSize thumbSpace * 2img Image.new(RGB, (imgWidth, imgHeight), (255, 255, 255))draw ImageDraw.Draw(img)for item in enumerate(treeTmp):# 为所有图片增加绘图数据x item[0] * (thumbSize thumbSpace) thumbSpace / 2y imgHeight - thumbSize - thumbSpace / 2 - ((item[1].parent.level - 1) * vLineLenght)w item[1].imgData.widthh item[1].imgData.heightif w h:h h / w * thumbSizew thumbSizeelse:w w / h * thumbSizeh thumbSizex (thumbSize - w) / 2item[1].pos (int(x), int(y), int(w), int(h))item[1].drawNode(img, draw, vLineLenght)for x in range(1, root.level 1):# 为所有非图片增加绘图的数据items list(filter(lambda i: i.level x, nodeList))for item in items:x item.left.pos[0] (item.left.pos[2] / 2)w item.right.pos[0] (item.right.pos[2] / 2) - xy item.left.pos[1] - (item.level - item.left.level) * vLineLenghth ((item.parent.level if item.parent ! None else item.level 1) - item.level) * vLineLenghtitem.pos (int(x), int(y), int(w), int(h))item.drawNode(img, draw, vLineLenght)img.save(resultFile)resultFile loadImg(rG:\Pythonnotes\test\HierarchicalClusterDataset) # 读取数据并返回最后结果要存储的文件名目录根据自己存的位置进行修改 if resultFile ! None:run()print(结构图生成成功最终结构图存储于 resultFile)

查看全文

http://wiki.neutronadmin.com/news/480309/