首页 > 编程知识 正文

kmeans算法python实现,简单python脚本实例

时间:2023-05-04 23:53:37 阅读:108766 作者:2584

用darknet的txt文件生成anchors

gen_anchors.py '''Created on Feb 20, 2017@author: jumabek'''from os import listdirfrom os.path import isfile, joinimport argparse#import cv2import numpy as npimport sysimport osimport shutilimport random import mathwidth_in_cfg_file = 416.height_in_cfg_file = 416.def IOU(x,centroids): similarities = [] k = len(centroids) for centroid in centroids: c_w,c_h = centroid w,h = x if c_w>=w and c_h>=h: similarity = w*h/(c_w*c_h) elif c_w>=w and c_h<=h: similarity = w*c_h/(w*h + (c_w-w)*c_h) elif c_w<=w and c_h>=h: similarity = c_w*h/(w*h + c_w*(c_h-h)) else: #means both w,h are bigger than c_w and c_h respectively similarity = (c_w*c_h)/(w*h) similarities.append(similarity) # will become (k,) shape return np.array(similarities) def avg_IOU(X,centroids): n,d = X.shape sum = 0. for i in range(X.shape[0]): #note IOU() will return array which contains IoU for each centroid and X[i] // slightly ineffective, but I am too lazy sum+= max(IOU(X[i],centroids)) return sum/ndef write_anchors_to_file(centroids,X,anchor_file): f = open(anchor_file,'w') anchors = centroids.copy() print(anchors.shape) for i in range(anchors.shape[0]): anchors[i][0]*=width_in_cfg_file/32. anchors[i][1]*=height_in_cfg_file/32. widths = anchors[:,0] sorted_indices = np.argsort(widths) print('Anchors = ', anchors[sorted_indices]) for i in sorted_indices[:-1]: f.write('%0.2f,%0.2f, '%(anchors[i,0],anchors[i,1])) #there should not be comma after last anchor, that's why f.write('%0.2f,%0.2fn'%(anchors[sorted_indices[-1:],0],anchors[sorted_indices[-1:],1])) f.write('%fn'%(avg_IOU(X,centroids))) print()def kmeans(X,centroids,eps,anchor_file): N = X.shape[0] iterations = 0 k,dim = centroids.shape prev_assignments = np.ones(N)*(-1) iter = 0 old_D = np.zeros((N,k)) while True: D = [] iter+=1 for i in range(N): d = 1 - IOU(X[i],centroids) D.append(d) D = np.array(D) # D.shape = (N,k) print("iter {}: dists = {}".format(iter,np.sum(np.abs(old_D-D)))) #assign samples to centroids assignments = np.argmin(D,axis=1) if (assignments == prev_assignments).all() : print("Centroids = ",centroids) write_anchors_to_file(centroids,X,anchor_file) return #calculate new centroids centroid_sums=np.zeros((k,dim),np.float) for i in range(N): centroid_sums[assignments[i]自觉的大船=X[i] for j in range(k): centroids[j] = centroid_sums[j]/(np.sum(assignments==j)) prev_assignments = assignments.copy() old_D = D.copy() def main(argv): parser = argparse.ArgumentParser() parser.add_argument('-filelist', default = '\path\to\voc\filelist\train.txt', help='path to filelistn' ) parser.add_argument('-output_dir', default = 'generated_anchors/anchors', type = str, help='Output anchor directoryn' ) parser.add_argument('-num_clusters', default = 0, type = int, help='number of clustersn' ) args = parser.parse_args() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) f = open(args.filelist) lines = [line.rstrip('n') for line in f.readlines()] annotation_dims = [] size = np.zeros((1,1,3)) for line in lines: #line = line.replace('images','labels') #line = line.replace('img1','labels') line = line.replace('JPEGImages','labels') line = line.replace('.jpg','.txt') line = line.replace('.png','.txt') print(line) f2 = open(line) for line in f2.readlines(): line = line.rstrip('n') w,h = line.split(' ')[3:] #print(w,h) annotation_dims.append(tuple(map(float,(w,h)))) annotation_dims = np.array(annotation_dims) eps = 0.005 if args.num_clusters == 0: for num_clusters in range(1,11): #we make 1 through 10 clusters anchor_file = join( args.output_dir,'anchors%d.txt'%(num_clusters)) indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)] centroids = annotation_dims[indices] kmeans(annotation_dims,centroids,eps,anchor_file) print('centroids.shape', centroids.shape) else: anchor_file = join( args.output_dir,'anchors%d.txt'%(args.num_clusters)) indices = [ random.randrange(annotation_dims.shape[0]) for i in range(args.num_clusters)] centroids = annotation_dims[indices] kmeans(annotation_dims,centroids,eps,anchor_file) print('centroids.shape', centroids.shape)if __name__=="__main__": main(sys.argv)

版权声明:该文观点仅代表作者本人。处理文章:请发送邮件至 三1五14八八95#扣扣.com 举报,一经查实,本站将立刻删除。