1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
| def filter_df_date(df, year, start_month, end_month):
df = df[df['Date'].dt.year == year]
df = df[(df['Date'].dt.month >= start_month) & (df['Date'].dt.month <= end_month)]
return df
def ts():
return pd.Timestamp.now().strftime('%Y_%m_%d_%H:%M:%S')
# export output utility function
def ts_export(output, name):
now = pd.Timestamp.now().strftime('%Y_%m_%d_%H:%M:%S')
output.to_csv(f'../../analysis/{name}_{now}.csv')
print('exported the output to csv...')
print('filepath')
print(f'../analysis/{name}_{now}.csv')
# converts $ column into number
## column is a string
def convert_to_number(df, column):
df[column] = df[column].str.replace('$','').str.replace(',','').astype(float)
## end covert to number code block
# # iterrows example from /sales/invoiced/legacy...
def margin_calculator(df):
for i, row in df.iterrows():
value = row["Rate"] - row["avg-cost"]
df.loc[i, "margin"] = value
print("margin calculated...")
# d = {'Rate': [10, 20, 5], 'avg-cost': [3.5, 4, 15]}
# df = pd.DataFrame(data=d)
# margin_calculator(df)
# df
export = False
# export = True
if export == True:
output.to_csv(f'../analysis/{name}_{now}.csv')
print('exported the output to csv...')
else:
print('no export')
print('filepath')
print(f'../analysis/{name}_{now}.csv')
# output.dtypes
# output.shape
# output.columns
# output.loc[9]
# output.head(30)
# output.head(3)
# output
# output = find_customer_invoices(invoices_df, cust_list)
# output.head(3)
# # scan two lists
# output = nscanner(ns_list, new_list)
#begin customer scanner program 'nscanner'
# given a list of customer names this function will return fuzzy matches and includes options to remove selected Terms
# dependencies: pandas, fuzzywuzzy
def nscanner(ns_list, new_list):
slim_leads=[]
slim_custs=[]
lead_list=[]
cust_list=[]
for lead in new_list:
# business commons
# commons = ['Inc.','Inc','Associates','Corporation', 'Services', 'Technologies','Technology','Computer', 'Computers', 'Communications', 'International', 'Solutions']
# school commons
# commons = ['Regional','County','Public','Schools','District','United','School','Community']
commons=[]
slim_lead = lead
try:
for x in commons:
slim_lead = slim_lead.replace(x,'').strip()
except:
print("lead", lead)
for cust in ns_list:
slim_cust = cust
try:
for x in commons:
slim_cust= slim_cust.replace(x,'').strip()
except:
print("cust",cust)
try:
if fuzz.ratio(slim_lead, slim_cust) > 70:
slim_leads.append(slim_lead)
slim_custs.append(slim_cust)
lead_list.append(lead)
cust_list.append(cust)
except:
print("fuzzy",slim_lead, cust)
return pd.DataFrame({'slim_leads': slim_leads,'slim_custs':slim_custs,'leads': lead_list, 'ns customers': cust_list})
## end nscanner block
# begin find customer invoice block
# given invoices data frame and a list of customer ids
# this function will scan the dataframe for customers matching
# and return a new dataframe with the customers on the list
# # find all invoices by customer
# invoices_df = import_csv('/sales/find invoices by customer/input/invoices_(22-24)_10_23_24')
# cust_df = import_csv('/sales/find invoices by customer/input/kens_list')
# cust_list = list(cust_df['Internal ID'])
#
def find_customer_invoices(invoices_df, cust_id_list):
new_df = pd.DataFrame()
for cust_id in cust_id_list:
tmp_df = invoices_df[invoices_df['Customer Internal ID'] == cust_id]
new_df = pd.concat([new_df, tmp_df])
return new_df
## Linear Regression
import numpy as np
from scipy import stats
import pandas as pd
# Example data
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 5, 4, 5])
# Method 1: Using scipy.stats
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
print(f"Slope: {slope}")
# Method 2: Using numpy's polyfit
slope_np = np.polyfit(x, y, 1)[0]
print(f"Slope (numpy): {slope_np}")
# Method 3: Using pandas and statsmodels
import statsmodels.api as sm
df = pd.DataFrame({'x': x, 'y': y})
model = sm.OLS(df['y'], sm.add_constant(df['x'])).fit()
slope_sm = model.params[1]
print(f"Slope (statsmodels): {slope_sm}")
|