Import data libraries¶

In [1]:
import pandas as pd
import numpy as np
from datetime import date
from datetime import datetime

Read excel file into pandas dataframe¶

Individual months are separated by month for user entry

Sheets also contain summary data (These rows do not begin with a date)
In [16]:
df = pd.concat(pd.read_excel('Packaging 2022_raw.xlsx',sheet_name=None),ignore_index=True)
In [3]:
# This funtion will evaluate rows in the spreadsheet and replace 'Date' field with NaN if not a production entry

def to_na(date):
    if type(date) == datetime:
        return date
    else:
        return np.nan
In [4]:
df['Date'] = df['Date'].apply(to_na)

Remove all rows that are not production entries¶

*As determined by valid date in 'Date' column
In [5]:
s = pd.to_datetime(df.Date, format='YYYY-mm-dd', errors='coerce')
In [6]:
df = df[s.notna()]

Drop columns not in Power BI dataset to match format¶

In [11]:
df.columns
Out[11]:
['Date',
 'Brand',
 'Batch #',
 'Tank',
 "Starting BBL's",
 'ZAHM',
 'FH 1/2\nKeg',
 'MS 1/2\nKeg',
 'FH 1/6\nKeg',
 'MS 1/6\nKeg',
 '24 pk 12oz bottle',
 '12pk 12oz bottle',
 'Loose 12 oz bottle',
 '24pk Lidl bottle',
 '24pk mixer bottle',
 '22 oz case bottle',
 '15 pk 12oz cans',
 '24 pk 12oz cans',
 '12pk 12oz cans',
 '24pk mixer can',
 '15pk 19.2oz cans',
 '24pk 19.2oz cans',
 'Lowfills 12 oz bottle',
 'Lowfills 22 oz bottle',
 'Lowfills 19.2 oz can',
 'Lowfills 12 oz \ncan',
 'BBL Packaged',
 'BBL Remain',
 'BBL Loss',
 '% Loss BBT',
 '% Loss Run',
 '% loss 12 oz bottle',
 '% loss 22 oz bottle',
 '% loss 12 oz can',
 '% loss 19.2 oz can',
 '%',
 '24pk 12oz mixer',
 'Mixer 12oz cans']
In [12]:
cols_to_drop = ['Lowfills 12 oz bottle',
 'Lowfills 22 oz bottle',
 'Lowfills 19.2 oz can',
 'Lowfills 12 oz \ncan','%',
 '24pk 12oz mixer',
 'Mixer 12oz cans']
In [13]:
df.drop(cols_to_drop, axis=1, inplace = True)

Export dataframe to excel file for Power BI report¶

In [15]:
df.to_excel('Packaging_2022_cleaned.xlsx',sheet_name='Sheet1',index=False)
In [ ]: