大数据竞赛模拟赛题3


商品销售案例

数据清洗

import pandas as pd

data = pd.read_excel('./商品销售数据.xlsx')

print(data.info())

# 清洗【用户 ID】列的缺失值
data = data.dropna(subset=['用户 ID'])
print(data.info())

# 查找重复数据
print(data[data.duplicated()])
# 删除重复值
data = data.drop_duplicates()
# 查找清洗后的数据是否存在重复数据
print(data[data.duplicated()])

print(data.describe())
# 筛选【数量】列大于 0 的数据
data = data[(data['数量'] > 0)]
print(data.describe())

数据分析与图标展示

# 获取各月总销售额
sales_income = mask_data_clean.groupby('月份')['销售额'].sum()

# 画出各月总销售额的折线图
sales_income.plot(kind = 'line', figsize = (7, 7), title = '各月总销售额趋势图')

# 获取各月总订单量
order_number = mask_data_clean.groupby('月份')['订单量'].sum()

# 画出各月总订单量的折线图
order_number.plot(kind = 'line', figsize = (7, 7), title = '各月总订单量趋势图')

# 获取每月平均单价
month_price = mask_data_clean.groupby('月份')['单价'].mean()

# 画出各月平均单价的折线图
month_price.plot(kind = 'line', figsize = (7, 7), title = '各月平均单价趋势图')

# 获取各月各省总订单量
month_order1 = mask_data_clean.groupby(['省', '月份'])['订单量'].sum()
month_order1_df = month_order1.unstack()

# 根据month_order1_df绘制多条折线图,标题为'各月各省总订单量趋势图'
month_order1_df.plot(kind = 'line', figsize = (7, 7), title = '各月各省总订单量趋势图')

# 获取各月各省总订单量
month_order2 = mask_data_clean.groupby(['月份', '省'])['订单量'].sum()
month_order2_df = month_order2.unstack()
# 根据month_order2_df绘制多条折线图,标题为'各省各月总订单量趋势图'
month_order2_df.plot(kind = 'line',figsize = (7, 7), title = '各省各月总订单量趋势图')

print(month_order1)

plt.show()

文章作者: 彭韦浩
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 彭韦浩 !
  目录