口罩厂亏损案例
数据清洗
import pandas as pd
mask_data = pd.read_csv('./工作/mask_data.csv', encoding = 'utf-8')
print(mask_data.info())
mask_data = mask_data.dropna()
print(mask_data.info())
mask_data = mask_data.dropna(subset=['订单量'])
print(mask_data.info())
print(mask_data.duplicated())
print(mask_data[mask_data.duplicated()])
mask_data = mask_data.drop_duplicates()
print(mask_data[mask_data.duplicated()])
mask_data = mask_data[mask_data['单价'] <= 200]
print(mask_data.describe())
date_data = pd.to_datetime(mask_data['日期'], format = '%Y-%m-%d')
print(date_data)
month_data = date_data.dt.month
print(month_data)
mask_data['月份'] = month_data
print(mask_data)
mask_data.to_csv('mask_data_clean.csv',encoding='utf-8',index = False)
mask_data_clean = pd.read_csv('mask_data_clean.csv',encoding='utf-8')
print(mask_data_clean)
数据分析与图标展示
sales_income = mask_data_clean.groupby('月份')['销售额'].sum()
sales_income.plot(kind = 'line', figsize = (7, 7), title = '各月总销售额趋势图')
order_number = mask_data_clean.groupby('月份')['订单量'].sum()
order_number.plot(kind = 'line', figsize = (7, 7), title = '各月总订单量趋势图')
month_price = mask_data_clean.groupby('月份')['单价'].mean()
month_price.plot(kind = 'line', figsize = (7, 7), title = '各月平均单价趋势图')
month_order1 = mask_data_clean.groupby(['省', '月份'])['订单量'].sum()
month_order1_df = month_order1.unstack()
month_order1_df.plot(kind = 'line', figsize = (7, 7), title = '各月各省总订单量趋势图')
month_order2 = mask_data_clean.groupby(['月份', '省'])['订单量'].sum()
month_order2_df = month_order2.unstack()
month_order2_df.plot(kind = 'line',figsize = (7, 7), title = '各省各月总订单量趋势图')
print(month_order1)
plt.show()