I have just completed this python script. It works the way I want it to, but as I am new to this, I would appreciate if any more experienced programmers might have a look at my code and let me know what they think? Is it efficient? Have I gone about it in a completely daft manner? What would you do differently etc…
##Design Spec
*The script should be scheduled to run every day just after midnight
*it checks to see if a csv file for that iso_week already exists in the local directory
*if it does then it waits 24 hours and checks again
*if it doesn’t, then it loads in the previous week’s file and the current data from a SQL Server database
*It compares the current data to the previous data, if it was there before then it assigns the status of “existing”
*If it’s new it gets status “new” and if it was in the previous file and not the current data it gets “now required”
*This data is appended to a list of dictionaries, which in turn becomes a dataframe
*The dataframe gets exported to a csv that will be used as the basis for comparison next week
*it also gets exported to an xlsx file that is the actual output for the user
##Code
##############################################################################################
# Module to create a weekly report showing Zero Required Stock and highlight changes #
##############################################################################################
##############################################################################################
# Import required python modules to gather and manipulate the data #
##############################################################################################
import pandas as pd
import time
from datetime import datetime, timedelta
import pyodbc
from pathlib import Path
import os
import schedule
##############################################################################################
# Establish a connection to Emax that can be called later #
##############################################################################################
emax = pyodbc.connect('Driver={SQL Server};'
'Server=cvc-sql\emax;'
'Clark Live;'
'Trusted_Connection=yes;'
'UID=sa;'
'PWD=sqllogin1;')
##############################################################################################
# Write SQL queries to strings so they can be called later #
##############################################################################################
#This query selects any part in the Part_list table that is in stock and the requirement is 0
#It filters out non-stock items or irrelevant items such as KAN and RAW
#The result is a table with headers: Part No, Description, In Stock, Unit Cost, Total Value,
#Type (i.e. Purchased or In-House), Category (i.e. Stock To Requirement / Can Hold etc.)
zero_requirement_stock = '''
SELECT [Part No]
,[Description]
,[In Stock]
,[Cost] AS 'Unit Cost'
,[In Stock] * [Cost] AS 'Total Value'
,CAST(CASE
WHEN [Description] LIKE 'MAN %' THEN 'In-House'
ELSE 'Purchased'
END AS varchar) AS 'Type'
,[Group Code] AS 'Category'
FROM [Clark Live].[dbo].[Part_list]
WHERE [Required] = 0
AND [In Stock] > 0
AND [Part No] NOT LIKE '%KAN%'
AND [Part No] NOT LIKE 'RAW%'
AND [Part No] NOT LIKE 'ADAPT%'
AND [Part No] NOT LIKE 'GM%'
AND [Part No] NOT LIKE 'TAS%'
AND [Part No] NOT LIKE 'UNI%'
AND [Part No] NOT LIKE 'PC%'
AND [Part No] NOT LIKE 'SER%'
AND [Part No] NOT LIKE 'SUNDRIES%'
ORDER BY [Part No]
'''
##############################################################################################
# Function Starts Here to be called on a schedule so data continually updates #
##############################################################################################
def refresh_data():
#Establish Today's Date and save to a variable
today = datetime.strptime(datetime.strftime(datetime.now(), "%d %b %y"), "%d %b %y")
#Reference a csv file for this week in the local directory
pathname = 'Zero_module_working ' + str(today.year) + '-' + str(today.isocalendar()[1]).zfill(2) + '.csv'
excel_path = 'Zero Requirement Comparison ' + str(today.year) + '-' + str(today.isocalendar()[1]).zfill(2) + '.xlsx'
path = Path('Zero_module_working ' + str(today.year) + '-' + str(today.isocalendar()[1]).zfill(2) + '.csv')
#print(path)
#Establish the week number and year for the previous week's file
last_week = str((datetime.today() - timedelta(days=7)).isocalendar()[1]).zfill(2)
last_week_year = str((datetime.today() - timedelta(days=7)).year)
#Logic laid down to establish if the process needs to run.
#If a csv file exists for this week then the program waits for the next week.
#If the file does not exist, then the rest of the program runs to create it.
if(path.is_file() == False): #If this week's report does not exist...
current = pd.read_sql_query(zero_requirement_stock, emax) #Load current zero required stock into a dataframe
previous = pd.read_csv('Zero_module_working ' + last_week_year + '-' + last_week + '.csv') #Load last week's file to a dataframe
list_of_new = [] #Initiate a blank list to contain dictionaries that will become today's report
counter = 0 #initiate a counter at zero to count the records that have been checked
while counter < len(current.index): #Until all current records have been checked...
check_for = current['Part No'].iloc[counter] #Identify part number to check from current report.
counter2 = 0 #initiate a second counter at zero to count how many previous lines, the current line has been compared to.
finder = '' #initiate a blank string variable to be set if the checked records match.
while counter2 < len(previous.index): #until the current line has been compared to all previous lines
check_against = previous['Part No'].iloc[counter2] #Identify previous part number to compare to current
if(check_for == check_against): #If they match...
finder = 'found' #Set the finder variable to found
break #Stop searching
counter2 = counter2 + 1 #Increase the previous record counter so loop looks at the next line
if (finder != 'found'): #If the record wasn't found, it must be a new one, so...
record_to_add = {
'Part No': current['Part No'].iloc[counter],
'Description': current['Description'].iloc[counter],
'In Stock': current['In Stock'].iloc[counter],
'Unit Cost': current['Unit Cost'].iloc[counter],
'Total Value': current['Total Value'].iloc[counter],
'Type': current['Type'].iloc[counter],
'Category': current['Category'].iloc[counter],
'Status': 'NEW'
}
list_of_new.append(record_to_add)
#Assign the status 'NEW' and add the line to the list
else: #If the record was found, it is an existing one, so...
record_to_add = {
'Part No': current['Part No'].iloc[counter],
'Description': current['Description'].iloc[counter],
'In Stock': current['In Stock'].iloc[counter],
'Unit Cost': current['Unit Cost'].iloc[counter],
'Total Value': current['Total Value'].iloc[counter],
'Type': current['Type'].iloc[counter],
'Category': current['Category'].iloc[counter],
'Status': 'EXISTING'
}
list_of_new.append(record_to_add)
#Assign the status 'EXISTING' and add the line to the list
counter = counter + 1 # Increase the counter so the loop now looks at the next line in the current dataframe
counter = 0 #initiate a counter at zero to count the records that have been checked
###################################################################################################
while counter < len(previous.index): #Until all previous records have been checked...
check_for = previous['Part No'].iloc[counter] #Identify part number to check from previous report.
counter2 = 0 #initiate a second counter at zero to count how many current lines, the previous line has been compared to.
finder = '' #initiate a blank string variable to be set if the checked records match.
while counter2 < len(current.index): #until the previous line has been compared to all current lines
check_against = current['Part No'].iloc[counter2] #Identify current part number to compare to previous
if(check_for == check_against): #If they match...
finder = 'found' #Set the finder variable to found
break #Stop searching
counter2 = counter2 + 1 #Increase the current record counter so loop looks at the next line
if (finder != 'found'): #If the record wasn't found, it must be required now, so...
record_to_add = {
'Part No': current['Part No'].iloc[counter],
'Description': current['Description'].iloc[counter],
'In Stock': current['In Stock'].iloc[counter],
'Unit Cost': current['Unit Cost'].iloc[counter],
'Total Value': current['Total Value'].iloc[counter],
'Type': current['Type'].iloc[counter],
'Category': current['Category'].iloc[counter],
'Status': 'NOW REQUIRED'
}
list_of_new.append(record_to_add)
#Assign the status 'NOW REQUIRED' and add the line to the list
counter = counter + 1 # Increase the counter so the loop now looks at the next line in the current dataframe
new_report = pd.DataFrame(list_of_new)
print('New Records Found')
print(new_report[new_report['Status'] == 'NEW'])
print('')
print(new_report[new_report['Status'] == 'NOW REQUIRED'])
new_report.to_csv(pathname, index=False)
new_report.to_excel(excel_path, index=False)
else:
print("SCRIPT EXECUTED ON " + str(today) + ", REPORT FOR THIS WEEK ALREADY EXISTS...")
print("Waiting 24 hours to cycle again...")
print("...")
print("...")
print("...")
##############################################################################################
# Function Ends Here #
##############################################################################################
##############################################################################################
# Setup a schedule to run the function every 24 hours #
##############################################################################################
###### WHEN DEBUGGING CODE, COMMENT OUT THE BELOW SECTION DOWN TO THE HASH MARKS ######
###### AND UNCOMMENT THE FUNCTION CALL BELOW IT, REVERSE THIS WHEN FINISHED ######
while True:
try:
refresh_data()
schedule.every().day.at("00:01").do(refresh_data)
while 1:
schedule.run_pending()
time.sleep(1)
except KeyboardInterrupt:
print('Script interrupted')
break
except Exception as e:
print(e)
continue
##############################################################################################
#refresh_data()