python 正态分布检验,python实现正态分布

最近刚入门python，欢迎大家多多指教！
最近接到老师的一个项目，要求对一些医疗数据进行分析

现在编写一个allfunction.py函数文件，使得获取excel的数据后，可以对数据进行数据分析。

最近刚入门python，欢迎大家多多指教！
最近接到老师的一个项目，要求对一些医疗数据进行分析

现在编写一个allfunction.py函数文件，使得获取excel的数据后，可以对数据进行数据分析。

from xlrd import open_workbookfrom xlutils.copy import copyimport xlrdimport xlwtfrom pandas.core.frame import DataFramefrom datetime import date, datetimeimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltfrom scipy import statsfrom xlutils.copy import copy# 绘图大致判断其是否符合正态分布def drawing(list,drawfilename): c = {"HR_SCORE_CHANGE": list} datas = DataFrame(c) print(datas) fig = plt.figure(figsize = (10,6)) ax2 = fig.add_subplot(1,1,1) datas.hist(淡淡的外套s=50,ax = ax2) datas.plot(kind = 'kde', secondary_y=True,ax = ax2) plt.grid() plt.savefig(drawfilename + "描绘正态分布直方图.png") # 保存图片 #plt.show() plt.cla() plt.clf() plt.close()#list为分析的列表，string为excel中的添加的总标题名def analyselist(list,string): # .kstest方法：KS检验，参数分别是：待检验的数据，检验方法（这里设置成norm正态分布），均值与标准差 # 结果返回两个值：statistic → D值，pvalue → P值,p值大于0.05，为正态分布 df = pd.DataFrame(list, columns=['value']) u = df['value'].mean() # 计算均值 std = df['value'].std() # 计算标准差 result = stats.kstest(df['value'], 'norm', (u, std)) #数据记入至excel表 r_xls = open_workbook("Summary of analysis results.xls") # 读取excel文件 rows_old = r_xls.sheets()[0].nrows # 获取已有的行数 excel = copy(r_xls) # 将xlrd的对象转化为xlwt的对象 table = excel.get_sheet(0) # 获取要操作的sheet #print (rows_old) #对excel表追加一行内容 table.write_merge(1 + rows_old, 1 + rows_old, 0, 6, string) #合并0至6列的单元格，写入string title = ["最大值", "最小值", "平均值", "方差", "标准差", "变异系数", "pvalue"] # 写入"最大值", "最小值", "平均值", "方差", "标准差", "变异系数", "pvalue"标题 for i in range(0, len(title)): table.write(2 + rows_old, i, title[i]) table.write(3 + rows_old, 0, str(np.max(list))) table.write(3 + rows_old, 1, str(np.min(list))) table.write(3 + rows_old, 2, str(format(np.mean(list),'.4f'))) table.write(3 + rows_old, 3, str(format(np.var(list),'.4f'))) table.write(3 + rows_old, 4, str(format(np.std(list), '.4f'))) table.write(3 + rows_old, 5, str(format((np.std(list, ddof=1)/np.mean(list)), '.4f'))) table.write(3 + rows_old, 6, result[1]) if float(result[1])> 0.05: table.write_merge(4 + rows_old, 4 + rows_old, 0, 6, "输出结果pvalue>0.05，接受其符合正态分布的假设！") else: table.write_merge(4, 4, 0, 6, "输出结果pvalue<0.05，不接受其符合正态分布的假设！") excel.save("Summary of analysis results.xls") # 保存并覆盖文件

结果如下：

代码借鉴了很多CSDN博主和博客园上发表的文章，涉及较多，没有一一记录，在这里向他们表示感谢，初学python很多东西不是特别熟练，欢迎大家多多交流
其实使用pandas写入excel更为方便
例如：要使用pandas 筛选出客户姓名以大写字母J 开头的那些行

#!/usr/淡淡的外套/env python3import pandas as pdimport sysinput_file = sys.argv[1]output_file = sys.argv[2]data_frame = pd.read_excel(input_file, 'january_2013', index_col=None)data_frame_value_matches_pattern = data_frame[data_frame['Customer Name'].str.startswith("J")]writer = pd.ExcelWriter(output_file)data_frame_value_matches_pattern.to_excel(writer, sheet_name='jan_13_output',index=False)writer.save()