51 lines
1.9 KiB
Python
51 lines
1.9 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
from sklearn.datasets import load_iris
|
|
|
|
# 加载Iris数据集并计算平均值
|
|
iris = load_iris()
|
|
df = pd.DataFrame(iris.data, columns=iris.feature_names)
|
|
df['Species'] = [iris.target_names[t] for t in iris.target]
|
|
df_mean = df.groupby('Species').mean().reset_index()
|
|
|
|
# 数据整理为长格式
|
|
df_melt = df_mean.melt(id_vars='Species', var_name='Measurement', value_name='Average')
|
|
|
|
# 设置可视化参数
|
|
measurements = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
|
|
species = df_mean['Species'].tolist()
|
|
colors = ['#4e79a7', '#f28e2b', '#e15759'] # 专业色盲友好配色
|
|
bar_width = 0.25
|
|
|
|
# 创建画布
|
|
plt.figure(figsize=(14, 8), dpi=100)
|
|
x = range(len(measurements))
|
|
|
|
# 绘制分组柱状图
|
|
for i, specie in enumerate(species):
|
|
offsets = [val + bar_width*i for val in x]
|
|
values = df_melt[df_melt['Species'] == specie]['Average']
|
|
plt.bar(offsets, values, width=bar_width, color=colors[i], label=specie, edgecolor='white', linewidth=0.5)
|
|
|
|
# 添加数据标签
|
|
for i in range(len(measurements)):
|
|
for j, specie in enumerate(species):
|
|
value = df_mean.loc[j, measurements[i]]
|
|
plt.text(x[i] + bar_width*j, value + 0.05, f'{value:.1f}',
|
|
ha='center', va='bottom', fontsize=9)
|
|
|
|
# 图表装饰
|
|
plt.title('Average Sepal and Petal Dimensions by Iris Species', pad=20, fontsize=14, fontweight='bold')
|
|
plt.xlabel('Measurement', labelpad=15, fontsize=12)
|
|
plt.ylabel('Average Dimension (cm)', labelpad=15, fontsize=12)
|
|
plt.xticks([r + bar_width for r in range(len(measurements))],
|
|
[m.replace(' (cm)', '').title() for m in measurements],
|
|
rotation=45, ha='right')
|
|
plt.ylim(0, df_melt['Average'].max()*1.3)
|
|
plt.grid(axis='y', linestyle='--', alpha=0.4)
|
|
plt.legend(title='Species', bbox_to_anchor=(1.02, 1), loc='upper left')
|
|
|
|
# 优化布局
|
|
plt.tight_layout()
|
|
plt.show()
|