Py.Cafe

Feanor1992/

Grocery ingredients

Dash Interactive Color ExThis code builds a robust, interactive dashboard for exploring grocery food data.ample

DocsPricing
  • GroceryDB_foods.csv
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
import pandas as pd
import numpy as np
from dash import Dash, dcc, html, Input, Output, State
import dash_ag_grid as dag
import plotly.graph_objs as go

df = pd.read_csv('GroceryDB_foods.csv')

df.rename(columns={
    'price percal': 'price_per_cal',
    'package_weight': 'package_weight',
    'Total Fat': 'total_fat',
    'Carbohydrate': 'carbohydrate',
    'Sugars, total': 'total_sugars',
    'Fiber, total dietary': 'total_fiber',
    'Fatty acids, total saturated': 'total_saturated_fat',
    'Total Vitamin A': 'total_vitamin_A'
}, inplace=True)

# Define vitamins/minerals that should have missing values replaced with 0
vitamins_minerals = ['Vitamin C', 'total_vitamin_A', 'Calcium', 'Iron']
for col in vitamins_minerals:
    if col in df.columns:
        df[col] = df[col].fillna(0)
    else:
        print(f"Warning: Column {col} not found in the dataset.")

# Fill missing values for other numeric columns with the median
# Also validate critical columns (price and package_weight)
for col in df.select_dtypes(include=[np.number]).columns:
    if col not in vitamins_minerals:
        if (df[col] < 0).any():
            # If negative values are found in price or package_weight, replace them with the median
            if col in ['price', 'package_weight']:
                median_val = df[col].median()
                df.loc[df[col] < 0, col] = median_val
                print(f"Corrected negative values in column {col} to median value {median_val}.")
            else:
                print(f"Warning: Negative values detected in column {col}.")
        df[col] = df[col].fillna(df[col].median())

# Fill missing values for categorical columns with "Unknown"
for col in df.select_dtypes(include=['object']).columns:
    df[col] = df[col].fillna('Unknown')

# Create a new feature: price per weight (assuming package_weight is in grams)
df['price_per_weight'] = df['price'] / df['package_weight']

# Define nutrient groups for nutrient density calculation.
# Beneficial nutrients (the higher, the better):
beneficial = ['Protein', 'total_fiber', 'Vitamin C', 'total_vitamin_A', 'Calcium', 'Iron']
# Negative nutrients (excess amounts reduce nutritional quality):
negative = ['total_fat', 'total_saturated_fat', 'total_sugars', 'Sodium', 'Cholesterol']

app = Dash(__name__)
server = app.server

# Prepare lists for dropdown menus
categories = sorted(df['harmonized single category'].unique())
brands = sorted(df['brand'].unique())

# Include an "All" option in the dropdowns
category_options = [{'label': 'All', 'value': 'All'}] + [{'label': cat, 'value': cat} for cat in categories]
brand_options = [{'label': 'All', 'value': 'All'}] + [{'label': brand, 'value': brand} for brand in brands]

# Nutrient options for visualizations (using original column names)
nutrient_options = [{'label': col, 'value': col} for col in 
                    ['Protein', 'total_fat', 'total_fiber', 'total_saturated_fat', 'total_sugars', 
                     'Vitamin C', 'total_vitamin_A', 'Calcium', 'Iron']]

# Axis options for scatter plot
axis_options = [
    {'label': 'Price', 'value': 'price'},
    {'label': 'Price per Calorie', 'value': 'price_per_cal'},
    {'label': 'Nutrient Density', 'value': 'nutrient_density'},
    {'label': 'Price per Weight', 'value': 'price_per_weight'}
]

# Color coding options for scatter plot (same as axis options)
color_options = axis_options

# Recommended daily values for some nutrients (for percentage calculation)
# These values are sample estimates and can be adjusted.
recommended_values = {
    'Protein': 50,               # grams
    'total_fat': 70,             # grams
    'total_fiber': 28,           # grams
    'total_saturated_fat': 20,   # grams
    'total_sugars': 90,          # grams
    'Vitamin C': 90,             # mg
    'total_vitamin_A': 900,      # mcg
    'Calcium': 1300,             # mg
    'Iron': 18                 # mg
}

# Layout of the app with dark theme and additional controls for radar chart metric selection
app.layout = html.Div(style={'backgroundColor': '#1a1a1a', 'color': 'white', 'padding': '10px'}, children=[
    html.H1('Enhanced Grocery Ingredients Dashboard', style={'textAlign': 'center'}),
    # Dropdown for category selection
    html.Div([
        html.Label('Select Category:', style={'margin-right': '10px'}),
        dcc.Dropdown(
            id='category-dropdown',
            options=category_options,
            value=['All'],
            multi=True,
            style={'color': 'black', 'width': '300px'}
        )
    ], style={'display': 'inline-block', 'padding': '10px'}),
    # Dropdown for brand selection
    html.Div([
        html.Label('Select Brand:', style={'margin-right': '10px'}),
        dcc.Dropdown(
            id='brand-dropdown',
            options=brand_options,
            value=['All'],
            multi=True,
            style={'color': 'black', 'width': '300px'}
        )
    ], style={'display': 'inline-block', 'padding': '10px'}),
    # Dropdown for selecting nutrients for the radar chart
    html.Div([
        html.Label('Select Nutrients for Radar Chart:', style={'margin-right': '10px'}),
        dcc.Dropdown(
            id='nutrient-dropdown',
            options=nutrient_options,
            value=['Protein', 'total_fat'],
            multi=True,
            style={'color': 'black', 'width': '500px'}
        )
    ], style={'padding': '10px'}),
    # Dropdowns to select X and Y axes for the scatter plot
    html.Div([
        html.Label('Select X-axis for Scatter Plot:', style={'margin-right': '10px'}),
        dcc.Dropdown(
            id='scatter-x-dropdown',
            options=axis_options,
            value='price',
            style={'color': 'black', 'width': '250px'}
        ),
        html.Label('Select Y-axis for Scatter Plot:', style={'margin-left': '10px', 'margin-right': '10px'}),
        dcc.Dropdown(
            id='scatter-y-dropdown',
            options=axis_options,
            value='nutrient_density',
            style={'color': 'black', 'width': '250px'}
        )
    ], style={'padding': '10px'}),
    # Dropdown for selecting the color variable for the scatter plot
    html.Div([
        html.Label('Select Color Variable for Scatter Plot:', style={'margin-right': '10px'}),
        dcc.Dropdown(
            id='scatter-color-dropdown',
            options=color_options,
            value='price_per_weight',
            style={'color': 'black', 'width': '300px'}
        )
    ], style={'padding': '10px'}),
    # Dropdown to select the nutrient for the histogram
    html.Div([
        html.Label('Select Nutrient for Histogram:', style={'margin-right': '10px'}),
        dcc.Dropdown(
            id='histogram-nutrient-dropdown',
            options=nutrient_options,
            value='Protein',
            multi=False,
            style={'color': 'black', 'width': '300px'}
        )
    ], style={'padding': '10px'}),
    # Input fields for setting custom weight factors for nutrient density calculation
    html.Div([
        html.Label('Set Beneficial Weight Factor (default=1):', style={'margin-right': '10px'}),
        dcc.Input(
            id='beneficial-weight-input',
            type='number',
            value=1,
            min=0,
            step=0.1,
            style={'color': 'black', 'width': '100px'}
        ),
        html.Label('Set Negative Weight Factor (default=1):', style={'margin-left': '20px', 'margin-right': '10px'}),
        dcc.Input(
            id='negative-weight-input',
            type='number',
            value=1,
            min=0,
            step=0.1,
            style={'color': 'black', 'width': '100px'}
        ),
    ], style={'padding': '10px'}),
    # Dropdown to choose the radar chart normalization method
    html.Div([
        html.Label('Select Radar Chart Metric:', style={'margin-right': '10px'}),
        dcc.Dropdown(
            id='radar-metric-dropdown',
            options=[
                {'label': 'Z-Score', 'value': 'zscore'},
                {'label': 'Absolute', 'value': 'absolute'},
                {'label': 'Percentage of Daily Value', 'value': 'percentage'}
            ],
            value='zscore',
            multi=False,
            style={'color': 'black', 'width': '300px'}
        )
    ], style={'padding': '10px'}),
    # Graphs: Radar Chart, Scatter Plot, Bar Chart, and Histogram
    html.Div([
        dcc.Graph(id='radar-chart'),
    ], style={'padding': '20px'}),
    html.Div([
        dcc.Graph(id='scatter-plot'),
    ], style={'padding': '20px'}),
    html.Div([
        dcc.Graph(id='bar-chart'),
    ], style={'padding': '20px'}),
    html.Div([
        dcc.Graph(id='histogram-chart'),
    ], style={'padding': '20px'}),
    # Data Grid: Display raw data
    html.H2('Data Table', style={'textAlign': 'center'}),
    dag.AgGrid(
        id='data-grid',
        rowData=df.to_dict('records'),
        columnDefs=[{"field": i, 'filter': True, 'sortable': True} for i in df.columns],
        dashGridOptions={'pagination': True},
        columnSize='sizeToFit'
    ),
])

# Callback to update the brand dropdown options based on selected categories.
@app.callback(
    [Output('brand-dropdown', 'options'),
     Output('brand-dropdown', 'value')],
    [Input('category-dropdown', 'value')]
)
def update_brand_options(selected_categories):
    if 'All' in selected_categories or not selected_categories:
        filtered_brands = brands
    else:
        filtered_brands = sorted(df[df['harmonized single category'].isin(selected_categories)]['brand'].unique())
    options = [{'label': 'All', 'value': 'All'}] + [{'label': brand, 'value': brand} for brand in filtered_brands]
    return options, ['All']

@app.callback(
    [Output('data-grid', 'rowData'),
     Output('radar-chart', 'figure'),
     Output('scatter-plot', 'figure'),
     Output('bar-chart', 'figure'),
     Output('histogram-chart', 'figure')],
    [Input('category-dropdown', 'value'),
     Input('brand-dropdown', 'value'),
     Input('nutrient-dropdown', 'value'),
     Input('scatter-x-dropdown', 'value'),
     Input('scatter-y-dropdown', 'value'),
     Input('scatter-color-dropdown', 'value'),
     Input('histogram-nutrient-dropdown', 'value'),
     Input('beneficial-weight-input', 'value'),
     Input('negative-weight-input', 'value'),
     Input('radar-metric-dropdown', 'value')]
)
def update_dashboard(selected_categories, selected_brands, selected_nutrients, scatter_x, scatter_y,
                     scatter_color, hist_nutrient, beneficial_weight, negative_weight, radar_metric):
    # If "All" is in the selected list, replace it with the full list of available values.
    if 'All' in selected_categories or not selected_categories:
        selected_categories = categories
    else:
        selected_categories = [cat for cat in selected_categories if cat != 'All']

    if 'All' in selected_brands or not selected_brands:
        selected_brands = brands
    else:
        selected_brands = [brand for brand in selected_brands if brand != 'All']
    
    # Filter data based on selected categories and brands.
    dff = df[(df['harmonized single category'].isin(selected_categories)) &
             (df['brand'].isin(selected_brands))].copy()

    # Recalculate nutrient_density using user-defined weights on the filtered data
    # For each nutrient in beneficial/negative lists that exists in the data,
    # compute z-scores on the filtered subset
    def compute_subset_z(s):
        return (s - s.mean()) / s.std() if s.std() !=0 else 0

    # Create temporary DataFrame for z-scores on the subset
    z_sub = pd.DataFrame()
    for nutrient in beneficial + negative:
        if nutrient in dff.columns:
            z_sub[nutrient] = compute_subset_z(dff[nutrient])

    # Keep only the nutrients that exist in the subset for each group
    beneficial_cols = [nutrient for nutrient in beneficial if nutrient in z_sub.columns]
    negative_cols = [nutrient for nutrient in negative if nutrient in z_sub.columns]

    # Compute nutrient_density as sum(weighted beneficial z-scores) minus sum(weighted negative z-scores)
    if not z_sub.empty:
        density_beneficial = z_sub[beneficial_cols].mul(beneficial_weight).sum(axis=1)
        density_negative = z_sub[negative_cols].mul(negative_weight).sum(axis=1)
        dff['nutrient_density'] = density_beneficial - density_negative
    else:
        dff['nutrient_density'] = 0

    # Radar Chart: Compute metric based on user selection
    # Group data by category and calculate mean values for the selected nutrients
    df_radar = dff.groupby('harmonized single category')[selected_nutrients].mean().reset_index()

    # Depending on radar_metric choice, process the data:
    # - "zscore": normalize each nutrient via z-score (for the subset of data)
    # - "absolute": use raw absolute values
    # - "percentage": calculate percentage of recommended daily value
    if radar_metric == 'zscore':
        for nutrient in selected_nutrients:
            df_radar[nutrient] = (df_radar[nutrient] - df_radar[nutrient].mean()) / df_radar[nutrient].std() if df_radar[nutrient].std() != 0 else 0
    elif radar_metric == 'percentage':
        for nutrient in selected_nutrients:
            rec_value = recommended_values.get(nutrient, 100)
            df_radar[nutrient] = (df_radar[nutrient] / rec_value) * 100
    radar_fig = go.Figure()
    # Add a trace for each selected nutrient
    for nutrient in selected_nutrients:
        radar_fig.add_trace(go.Scatterpolar(
            r=df_radar[nutrient],
            theta=df_radar['harmonized single category'],
            fill='toself',
            name=nutrient
        ))
    # Set layout for the radar chart with dark theme
    radar_fig.update_layout(
        title=f"Average Nutrients by Category ({'Z-Score' if radar_metric=='zscore' else ('Percentage' if radar_metric=='percentage' else 'Absolute')})",
        polar=dict(
            radialaxis=dict(
                visible=True,
                color='white'
            )
        ),
        template='plotly_dark',
        font=dict(color='white')
    )
    # Scatter Plot: Dynamic axes and color coding based on user selections
    scatter_fig = go.Figure(data=go.Scatter(
        x=dff[scatter_x],
        y=dff[scatter_y],
        mode='markers',
        marker=dict(
            size=10,
            color=dff[scatter_color],
            colorscale='Viridis',
            showscale=True
        ),
        text=dff['name']
    ))
    scatter_fig.update_layout(
        title=f"Scatter Plot: {scatter_x} vs {scatter_y} (Color: {scatter_color})",
        xaxis_title=scatter_x,
        yaxis_title=scatter_y,
        template='plotly_dark',
        font=dict(color='white')
    )
    # Bar Chart: Compare average price per calorie by category
    df_bar = dff.groupby('harmonized single category')['price_per_cal'].mean().reset_index()
    bar_fig = go.Figure(data=go.Bar(
        x=df_bar['harmonized single category'],
        y=df_bar['price_per_cal'],
        marker_color='lightsalmon'
    ))
    bar_fig.update_layout(
        title='Average Price per Calorie by Category',
        xaxis_title='Category',
        yaxis_title='Price per Calorie',
        template='plotly_dark',
        font=dict(color='white')
    )
    # Histogram: Distribution of selected nutrient
    hist_fig = go.Figure(data=go.Histogram(
        x=dff[hist_nutrient],
        nbinsx=20,
        marker_color='lightskyblue'
    ))
    hist_fig.update_layout(
        title=f"Distribution of {hist_nutrient}",
        xaxis_title=hist_nutrient,
        yaxis_title='Count',
        template='plotly_dark',
        font=dict(color='white')
    )

    return dff.to_dict("records"), radar_fig, scatter_fig, bar_fig, hist_fig

if __name__ == "__main__":
    app.run(debug=True)