Py.Cafe

marie-anne/

dash-ibcs-experiment-3

AC and LY revenue by employee in month K$

DocsPricing
  • app.py
  • requirements.txt
  • revenue_emp_m_ly.csv
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 20 15:00:14 2025

@author: marie-anne melis
"""

import dash as dash
from dash import dcc, html

import plotly.graph_objects as go
import pandas as pd
import dash_bootstrap_components as dbc
pd.options.plotting.backend = "plotly"



dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"

#df = create_total_revenye_by_employee_month()
df =  pd.read_csv('revenue_emp_m_ly.csv')

#filter on employee id 1, nancy


colors = {'AC': 'rgba(64,64,64,1)','PY':'rgba(166,166,166,1)', 'red': 'rgba(255,0,0,1)', 'green':'rgba(0,142,150,1)', 'lightGrey': 'rgba(166,166,166,1)' }
options_employee_dict = dict(zip(df['employeeName'], df['employeeID']))


# custom function to set fill color
#https://stackoverflow.com/questions/64741015/plotly-how-to-color-the-fill-between-two-lines-based-on-a-condition

y_overall_max = df['revenue'].max() if df['revenue'].max() > df['revenue_ly'].max() else df['revenue_ly'].max()





def create_line_chart_employee(dff):
    
    #dff consists of columns, the ones used here are:
    #orderDate
    #orderDate_ym: date becomes something like 2025-07
    #revenue 
    #revenue_ly => revenue last year
    #employeeName => only used for the visual title, not
    #relevant for the intersection part, the incoming dataframe is already
    #filtered by employee and consists of 12 rows, 1 per month
    
    
    #get interesting values meaning, max, min, start and end current revenue
    
    ymax = dff['revenue'].max()
    xmax = dff.loc[dff['revenue'].idxmax(),'orderDate_ym']
    #get min revenue
    ymin = dff['revenue'].min()
    xmin = dff.loc[dff['revenue'].idxmin(),'orderDate_ym']
    #first month
    ystart = dff['revenue'].iloc[0]
    xstart =  dff['orderDate_ym'].iloc[0]
    #last month
    yend = dff['revenue'].iloc[-1]
    xend =  dff['orderDate_ym'].iloc[-1]
    
    #title equals name of employee
    employee_name = dff['employeeName'].iloc[0]
    
    #before we begin, a copy of the original data is necessery
    #to print the 2 linecharts as they are meant to be, not with 
    #intersections, the dff_org index will be adjusted to
    #the index used for the df with intersections (date), so taht
    #the x-axis can be shared.
    
    dff_org = dff.copy(deep=True)
    dff_org.index =  pd.to_datetime(dff_org.orderDate)
    #keep only relevant columns
    dff_org= dff_org[['revenue', 'revenue_ly']]
    
    #dff is the input dataframe with 12 datapoints, 1 for every month
    #to calculate the intersections, the index is set to date/time
    #and dff is stripped to the new index + revenue + revenue_ly
      
    
    dff.index =  pd.to_datetime(dff.orderDate)
    dff = dff[['revenue', 'revenue_ly']]
    
    #a new df, to create the intersections and join with calculated intersections in the end

    df1 = dff.copy()
    
    #initialize the column intersect and give all datapoints (12) the value none,
    #intersection points will get start/stop
    df1['intsect'] = 'none'
            
    # --- Step 1: Find Intersection Points and Round to Nearest Date Index ---
    intersections = []
    date_index = df1.index
    
    for i in range(len(df1) - 1):
        x1, x2 = date_index[i], date_index[i + 1]
        y1, y2 = df1.revenue.iloc[i], df1.revenue.iloc[i + 1]
        y1_ly, y2_ly = df1.revenue_ly.iloc[i], df1.revenue_ly.iloc[i + 1]
    
        # Check if the lines cross
        if (y1 > y1_ly and y2 < y2_ly) or (y1 < y1_ly and y2 > y2_ly):
            # Calculate slopes
            days_diff = (x2 - x1).days + 1e-9
            slope1 = (y2 - y1) / days_diff
            slope2 = (y2_ly - y1_ly) / days_diff
    
            # Solve for x where revenue = revenue_ly
            x_intersect_days = (y1_ly - y1) / (slope1 - slope2 + 1e-9)
            x_intersect = x1 + pd.Timedelta(days=x_intersect_days)
    
    
            # Set revenue_ly = revenue at the intersection point
            # This was actually something that went wrong with chatgpt
            # the whole time unless I explained that if the revenue 
            # intersection points were correct, but the revenue_ly
            # was incorrect, and revenue_ly equals revenue at the intersection
            # it might as well use the revenue instead of calculated wrong revenue_ly
            y_intersect = y1 + slope1 * x_intersect_days
    
            # Store intersection points, intersection points get a intsect value stop
            # the first en start, the second.
            # The idea is that an intersection point is the end of an area
            # and the start of a new one. The 10 minutes difference is for date index sake.
            intersections.append((x_intersect, y_intersect, y_intersect, 'stop'))
            #these 10 minutes are fake but needed because we're talking the index
            intersections.append((x_intersect + pd.Timedelta(minutes=10), y_intersect, y_intersect, 'start'))
            #intersections.append((closest_date, y_intersect, y_intersect, "start"))
    
    # Convert intersections to DataFrame with same columns as df1
    intersections_df = pd.DataFrame(intersections, columns=['date', 'revenue', 'revenue_ly','intsect'])
    intersections_df.set_index('date', inplace=True)
    
    # Merge intersection points into df1 (copy original with 12 datapoints and intersect value none)
    df1 = pd.concat([df1, intersections_df], ignore_index=False).sort_index()
   
    
    # labelvalue 1 equals better than last year, 0 ... You need to now the label of the
    # previous row then finish the definition of an area correct.
    df1['label'] = 0
    prev_label = 0
    
    
    
    for index, row in df1.iterrows():
        #processing normal row meaning one which belongs to an original datapoint.
        if row['intsect'] == 'none':
            df1.at[index,'label'] =  1 if row['revenue'] > row['revenue_ly'] else 0
            prev_label = df1.at[index,'label']
        #processing intersection, stop means end of area, start means new area switch label
        #meaning switch color
        if ((row['revenue'] == row['revenue_ly']) and (row['intsect'] == 'stop')):
            #end area at intersection
            df1.at[index,'label'] =  prev_label
            prev_label = df1.at[index,'label']
        else:
            #start a new area from intersection
            df1.at[index,'label'] =  0 if prev_label == 1 else 1
            prev_label = df1.at[index,'label']
            
    
    
    #the result in df1 is datapoints with labels and a grouping number,
    #each grouping number defines an area.
       
    df1['group'] = df1['label'].ne(df1['label'].shift()).cumsum()
    
    

    # following lines create a dataframe in dfs per group
    df1 = df1.groupby('group')

    dfs = []
    
    for name, data in df1:
        dfs.append(data)
        
    
    
    # # custom function to set fill color
    def fillcol(label):
        if label >= 1:
            return 'rgba(255,0,0,1)'
        else:
            return 'rgba(0,142,150,1)'
    
    fig = go.Figure()
     
    #Creating the coloured areas, the line color is black, opacity
    #0 meaning not visible. Each df in the dfs holds the datapoints for 1 green or 
    #red area. The more intersections, the more areas to define in the
    #dfs, hence more df's to draw.
    
    for df in dfs:
        fig.add_traces(go.Scatter(x=df.index, y = df.revenue,
                                  line = dict(color='rgba(0,0,0,0)')))
    
        fig.add_traces(go.Scatter(x=df.index, y = df.revenue_ly,
                                  line = dict(color='rgba(0,0,0,0)'),
                                  fill='tonexty', 
                                  fillcolor = fillcol(df['label'].iloc[0])))
    
    
    
    #printing the lines with the original datapoints
    fig.add_traces(go.Scatter(x=dff_org.index,  y = dff_org.revenue, mode="lines", 
                          line = dict(color = 'rgba(64,64,64,1)', width=2),
                          fill='tozeroy', fillcolor = 'rgba(166,166,166,.2)'
                          ))
    
    # 'revenue_lastyear
    fig.add_traces(go.Scatter(x=dff_org.index, y = dff_org.revenue_ly,mode="lines",
                          line = dict(color = 'rgba(166,166,166,1)', width=1)))
    
    fig.update_traces(hovertemplate = "%{x}: %{y:,.3s}<extra></extra>")
    
    
    #START DOING NORMAL THINGS YOU DO WITH A GO.FIGURE OBJECT.
    
    fig.update_layout(showlegend=False, height=275)
    
    
    
    #add min max, start and last monthly sales value
    fig.add_scatter(x=[xmax, xmin, xstart, xend],
                y=[ymax, ymin, ystart, yend], 
                text=[ymax, ymin, ystart, yend],
                texttemplate = "%{y:.3s}",
                textposition=['top center','bottom center','top center','top center'],
                mode='markers + text', marker=dict(color='Black', size=[10,10,10,14]))    
    
    
    
    fig.add_hline( y=0, line_dash="solid", line_width=1, opacity=1, line_color="Black")
    
    
    
    # Adding labels of AC and PY at start of line (left)
    # We know xy start for AC, have to query for PY
    
    annotations = []
    
    
       
    # # labeling the left_side of the plot with AC
    annotations.append(dict(xref='paper', x=0.05, y=ystart,
                                  xanchor='right', yanchor='middle',
                                  text='AC',
                                  font=dict(family='Arial',
                                            size=16),
                                  showarrow=False))
    
    # # labeling the left_side of the plot with PY (first value revenue_ly)
    annotations.append(dict(xref='paper', x=0.05, y=dff['revenue_ly'].iloc[0],
                                  xanchor='right', yanchor='middle',
                                  text='PY',
                                  font=dict(family='Arial',
                                            size=16),
                                  showarrow=False))
    
    
    
    #reset y axis to have the same scale for all employees thus making comparison easier and
    #scale it larger than max and min to make sure all values will end up in the visual
    #how exactly is a bit of trial and error.
    fig.update_yaxes(title='', visible=False, showticklabels=False,range=[-3500, 1.2 * y_overall_max])
    fig.update_xaxes(title='', visible=False, showticklabels=False)
    
    fig.update_layout(
            margin=dict(l=15, r=15, t=45, b=25),
            plot_bgcolor='white',
            annotations=annotations,
            showlegend=False,
            title = {
                 'text': f"{employee_name}",
                 'font': dict(color='black',
                              weight = 500,
                              family = 'Roboto'),

                 
                },
            
            
    
            )
        
    card = dbc.Card([ 
                   dcc.Graph(figure = fig)
                    ], className='col-md-4', style={})
    
    
    
    
    return card
    
    



def create_cardgrid_employees(df):
    
    employees_ids = df['employeeID'].unique()
    card_list = []
    
    for e in employees_ids:
        #this is not completely correct since an employee could have left
        data =df.loc[df['employeeID'] == e].tail(12).reset_index(drop=True)
        
        card_list.append(create_line_chart_employee(data))
    
    
    return card_list





app = dash.Dash(__name__, external_stylesheets=[dbc.themes.SANDSTONE,dbc_css]) 

app.layout = dbc.Container([ 
               dbc.Row([
                   dbc.Col([
                       html.P('Northwind Traders'),
                       html.P('AC and LY revenue by employee in month K$'),
                       html.P(id='dynamic_title')
                       
                       ], style={'fontWeight': 'bold'}),

                   dbc.Col([
                       html.P(''),
                       ], className='col-md-4 col-sm-12'),
                                  

                   dbc.Col([
                       html.H2('IBCS Experiment no.3'),
                       ], className='col-md-4 col-sm-12')
                   ], style={'marginBottom': '2rem'}),
               
               dbc.Row([ 
                   dbc.Col([html.Div(create_cardgrid_employees(df), style={'display':'flex', 'flexWrap':'wrap', 'marginBottom': '2rem'})], className='col-md-12 col-sm-12'),
                   
                   ], className='col-md-12'),
               ],style={'marginTop': '3rem'},  fluid=False)






app.run_server(debug=True)