import pandas as pd
import numpy as np
from datetime import date
from datetime import datetime
Individual months are separated by month for user entry
Sheets also contain summary data (These rows do not begin with a date)
df = pd.concat(pd.read_excel('Packaging 2022_raw.xlsx',sheet_name=None),ignore_index=True)
# This funtion will evaluate rows in the spreadsheet and replace 'Date' field with NaN if not a production entry
def to_na(date):
if type(date) == datetime:
return date
else:
return np.nan
df['Date'] = df['Date'].apply(to_na)
*As determined by valid date in 'Date' column
s = pd.to_datetime(df.Date, format='YYYY-mm-dd', errors='coerce')
df = df[s.notna()]
df.columns
['Date', 'Brand', 'Batch #', 'Tank', "Starting BBL's", 'ZAHM', 'FH 1/2\nKeg', 'MS 1/2\nKeg', 'FH 1/6\nKeg', 'MS 1/6\nKeg', '24 pk 12oz bottle', '12pk 12oz bottle', 'Loose 12 oz bottle', '24pk Lidl bottle', '24pk mixer bottle', '22 oz case bottle', '15 pk 12oz cans', '24 pk 12oz cans', '12pk 12oz cans', '24pk mixer can', '15pk 19.2oz cans', '24pk 19.2oz cans', 'Lowfills 12 oz bottle', 'Lowfills 22 oz bottle', 'Lowfills 19.2 oz can', 'Lowfills 12 oz \ncan', 'BBL Packaged', 'BBL Remain', 'BBL Loss', '% Loss BBT', '% Loss Run', '% loss 12 oz bottle', '% loss 22 oz bottle', '% loss 12 oz can', '% loss 19.2 oz can', '%', '24pk 12oz mixer', 'Mixer 12oz cans']
cols_to_drop = ['Lowfills 12 oz bottle',
'Lowfills 22 oz bottle',
'Lowfills 19.2 oz can',
'Lowfills 12 oz \ncan','%',
'24pk 12oz mixer',
'Mixer 12oz cans']
df.drop(cols_to_drop, axis=1, inplace = True)
df.to_excel('Packaging_2022_cleaned.xlsx',sheet_name='Sheet1',index=False)