import pandas as pd
import numpy as np
from datetime import date
from datetime import datetime


df = pd.concat(pd.read_excel('Packaging 2022_raw.xlsx',sheet_name=None),ignore_index=True)


# This funtion will evaluate rows in the spreadsheet and replace 'Date' field with NaN if not a production entry

def to_na(date):
    if type(date) == datetime:
        return date
    else:
        return np.nan


df['Date'] = df['Date'].apply(to_na)


s = pd.to_datetime(df.Date, format='YYYY-mm-dd', errors='coerce')


df = df[s.notna()]


df.columns

['Date',
 'Brand',
 'Batch #',
 'Tank',
 "Starting BBL's",
 'ZAHM',
 'FH 1/2\nKeg',
 'MS 1/2\nKeg',
 'FH 1/6\nKeg',
 'MS 1/6\nKeg',
 '24 pk 12oz bottle',
 '12pk 12oz bottle',
 'Loose 12 oz bottle',
 '24pk Lidl bottle',
 '24pk mixer bottle',
 '22 oz case bottle',
 '15 pk 12oz cans',
 '24 pk 12oz cans',
 '12pk 12oz cans',
 '24pk mixer can',
 '15pk 19.2oz cans',
 '24pk 19.2oz cans',
 'Lowfills 12 oz bottle',
 'Lowfills 22 oz bottle',
 'Lowfills 19.2 oz can',
 'Lowfills 12 oz \ncan',
 'BBL Packaged',
 'BBL Remain',
 'BBL Loss',
 '% Loss BBT',
 '% Loss Run',
 '% loss 12 oz bottle',
 '% loss 22 oz bottle',
 '% loss 12 oz can',
 '% loss 19.2 oz can',
 '%',
 '24pk 12oz mixer',
 'Mixer 12oz cans']


cols_to_drop = ['Lowfills 12 oz bottle',
 'Lowfills 22 oz bottle',
 'Lowfills 19.2 oz can',
 'Lowfills 12 oz \ncan','%',
 '24pk 12oz mixer',
 'Mixer 12oz cans']


df.drop(cols_to_drop, axis=1, inplace = True)


df.to_excel('Packaging_2022_cleaned.xlsx',sheet_name='Sheet1',index=False)

Import data libraries¶

Read excel file into pandas dataframe¶

Remove all rows that are not production entries¶

Drop columns not in Power BI dataset to match format¶

Export dataframe to excel file for Power BI report¶