import pandas as pd
import numpy as np
from dash import Dash, dcc, html, Input, Output, State
import dash_ag_grid as dag
import plotly.graph_objs as go
df = pd.read_csv('GroceryDB_foods.csv')
df.rename(columns={
'price percal': 'price_per_cal',
'package_weight': 'package_weight',
'Total Fat': 'total_fat',
'Carbohydrate': 'carbohydrate',
'Sugars, total': 'total_sugars',
'Fiber, total dietary': 'total_fiber',
'Fatty acids, total saturated': 'total_saturated_fat',
'Total Vitamin A': 'total_vitamin_A'
}, inplace=True)
# Define vitamins/minerals that should have missing values replaced with 0
vitamins_minerals = ['Vitamin C', 'total_vitamin_A', 'Calcium', 'Iron']
for col in vitamins_minerals:
if col in df.columns:
df[col] = df[col].fillna(0)
else:
print(f"Warning: Column {col} not found in the dataset.")
# Fill missing values for other numeric columns with the median
# Also validate critical columns (price and package_weight)
for col in df.select_dtypes(include=[np.number]).columns:
if col not in vitamins_minerals:
if (df[col] < 0).any():
# If negative values are found in price or package_weight, replace them with the median
if col in ['price', 'package_weight']:
median_val = df[col].median()
df.loc[df[col] < 0, col] = median_val
print(f"Corrected negative values in column {col} to median value {median_val}.")
else:
print(f"Warning: Negative values detected in column {col}.")
df[col] = df[col].fillna(df[col].median())
# Fill missing values for categorical columns with "Unknown"
for col in df.select_dtypes(include=['object']).columns:
df[col] = df[col].fillna('Unknown')
# Create a new feature: price per weight (assuming package_weight is in grams)
df['price_per_weight'] = df['price'] / df['package_weight']
# Define nutrient groups for nutrient density calculation.
# Beneficial nutrients (the higher, the better):
beneficial = ['Protein', 'total_fiber', 'Vitamin C', 'total_vitamin_A', 'Calcium', 'Iron']
# Negative nutrients (excess amounts reduce nutritional quality):
negative = ['total_fat', 'total_saturated_fat', 'total_sugars', 'Sodium', 'Cholesterol']
app = Dash(__name__)
server = app.server
# Prepare lists for dropdown menus
categories = sorted(df['harmonized single category'].unique())
brands = sorted(df['brand'].unique())
# Include an "All" option in the dropdowns
category_options = [{'label': 'All', 'value': 'All'}] + [{'label': cat, 'value': cat} for cat in categories]
brand_options = [{'label': 'All', 'value': 'All'}] + [{'label': brand, 'value': brand} for brand in brands]
# Nutrient options for visualizations (using original column names)
nutrient_options = [{'label': col, 'value': col} for col in
['Protein', 'total_fat', 'total_fiber', 'total_saturated_fat', 'total_sugars',
'Vitamin C', 'total_vitamin_A', 'Calcium', 'Iron']]
# Axis options for scatter plot
axis_options = [
{'label': 'Price', 'value': 'price'},
{'label': 'Price per Calorie', 'value': 'price_per_cal'},
{'label': 'Nutrient Density', 'value': 'nutrient_density'},
{'label': 'Price per Weight', 'value': 'price_per_weight'}
]
# Color coding options for scatter plot (same as axis options)
color_options = axis_options
# Recommended daily values for some nutrients (for percentage calculation)
# These values are sample estimates and can be adjusted.
recommended_values = {
'Protein': 50, # grams
'total_fat': 70, # grams
'total_fiber': 28, # grams
'total_saturated_fat': 20, # grams
'total_sugars': 90, # grams
'Vitamin C': 90, # mg
'total_vitamin_A': 900, # mcg
'Calcium': 1300, # mg
'Iron': 18 # mg
}
# Layout of the app with dark theme and additional controls for radar chart metric selection
app.layout = html.Div(style={'backgroundColor': '#1a1a1a', 'color': 'white', 'padding': '10px'}, children=[
html.H1('Enhanced Grocery Ingredients Dashboard', style={'textAlign': 'center'}),
# Dropdown for category selection
html.Div([
html.Label('Select Category:', style={'margin-right': '10px'}),
dcc.Dropdown(
id='category-dropdown',
options=category_options,
value=['All'],
multi=True,
style={'color': 'black', 'width': '300px'}
)
], style={'display': 'inline-block', 'padding': '10px'}),
# Dropdown for brand selection
html.Div([
html.Label('Select Brand:', style={'margin-right': '10px'}),
dcc.Dropdown(
id='brand-dropdown',
options=brand_options,
value=['All'],
multi=True,
style={'color': 'black', 'width': '300px'}
)
], style={'display': 'inline-block', 'padding': '10px'}),
# Dropdown for selecting nutrients for the radar chart
html.Div([
html.Label('Select Nutrients for Radar Chart:', style={'margin-right': '10px'}),
dcc.Dropdown(
id='nutrient-dropdown',
options=nutrient_options,
value=['Protein', 'total_fat'],
multi=True,
style={'color': 'black', 'width': '500px'}
)
], style={'padding': '10px'}),
# Dropdowns to select X and Y axes for the scatter plot
html.Div([
html.Label('Select X-axis for Scatter Plot:', style={'margin-right': '10px'}),
dcc.Dropdown(
id='scatter-x-dropdown',
options=axis_options,
value='price',
style={'color': 'black', 'width': '250px'}
),
html.Label('Select Y-axis for Scatter Plot:', style={'margin-left': '10px', 'margin-right': '10px'}),
dcc.Dropdown(
id='scatter-y-dropdown',
options=axis_options,
value='nutrient_density',
style={'color': 'black', 'width': '250px'}
)
], style={'padding': '10px'}),
# Dropdown for selecting the color variable for the scatter plot
html.Div([
html.Label('Select Color Variable for Scatter Plot:', style={'margin-right': '10px'}),
dcc.Dropdown(
id='scatter-color-dropdown',
options=color_options,
value='price_per_weight',
style={'color': 'black', 'width': '300px'}
)
], style={'padding': '10px'}),
# Dropdown to select the nutrient for the histogram
html.Div([
html.Label('Select Nutrient for Histogram:', style={'margin-right': '10px'}),
dcc.Dropdown(
id='histogram-nutrient-dropdown',
options=nutrient_options,
value='Protein',
multi=False,
style={'color': 'black', 'width': '300px'}
)
], style={'padding': '10px'}),
# Input fields for setting custom weight factors for nutrient density calculation
html.Div([
html.Label('Set Beneficial Weight Factor (default=1):', style={'margin-right': '10px'}),
dcc.Input(
id='beneficial-weight-input',
type='number',
value=1,
min=0,
step=0.1,
style={'color': 'black', 'width': '100px'}
),
html.Label('Set Negative Weight Factor (default=1):', style={'margin-left': '20px', 'margin-right': '10px'}),
dcc.Input(
id='negative-weight-input',
type='number',
value=1,
min=0,
step=0.1,
style={'color': 'black', 'width': '100px'}
),
], style={'padding': '10px'}),
# Dropdown to choose the radar chart normalization method
html.Div([
html.Label('Select Radar Chart Metric:', style={'margin-right': '10px'}),
dcc.Dropdown(
id='radar-metric-dropdown',
options=[
{'label': 'Z-Score', 'value': 'zscore'},
{'label': 'Absolute', 'value': 'absolute'},
{'label': 'Percentage of Daily Value', 'value': 'percentage'}
],
value='zscore',
multi=False,
style={'color': 'black', 'width': '300px'}
)
], style={'padding': '10px'}),
# Graphs: Radar Chart, Scatter Plot, Bar Chart, and Histogram
html.Div([
dcc.Graph(id='radar-chart'),
], style={'padding': '20px'}),
html.Div([
dcc.Graph(id='scatter-plot'),
], style={'padding': '20px'}),
html.Div([
dcc.Graph(id='bar-chart'),
], style={'padding': '20px'}),
html.Div([
dcc.Graph(id='histogram-chart'),
], style={'padding': '20px'}),
# Data Grid: Display raw data
html.H2('Data Table', style={'textAlign': 'center'}),
dag.AgGrid(
id='data-grid',
rowData=df.to_dict('records'),
columnDefs=[{"field": i, 'filter': True, 'sortable': True} for i in df.columns],
dashGridOptions={'pagination': True},
columnSize='sizeToFit'
),
])
# Callback to update the brand dropdown options based on selected categories.
@app.callback(
[Output('brand-dropdown', 'options'),
Output('brand-dropdown', 'value')],
[Input('category-dropdown', 'value')]
)
def update_brand_options(selected_categories):
if 'All' in selected_categories or not selected_categories:
filtered_brands = brands
else:
filtered_brands = sorted(df[df['harmonized single category'].isin(selected_categories)]['brand'].unique())
options = [{'label': 'All', 'value': 'All'}] + [{'label': brand, 'value': brand} for brand in filtered_brands]
return options, ['All']
@app.callback(
[Output('data-grid', 'rowData'),
Output('radar-chart', 'figure'),
Output('scatter-plot', 'figure'),
Output('bar-chart', 'figure'),
Output('histogram-chart', 'figure')],
[Input('category-dropdown', 'value'),
Input('brand-dropdown', 'value'),
Input('nutrient-dropdown', 'value'),
Input('scatter-x-dropdown', 'value'),
Input('scatter-y-dropdown', 'value'),
Input('scatter-color-dropdown', 'value'),
Input('histogram-nutrient-dropdown', 'value'),
Input('beneficial-weight-input', 'value'),
Input('negative-weight-input', 'value'),
Input('radar-metric-dropdown', 'value')]
)
def update_dashboard(selected_categories, selected_brands, selected_nutrients, scatter_x, scatter_y,
scatter_color, hist_nutrient, beneficial_weight, negative_weight, radar_metric):
# If "All" is in the selected list, replace it with the full list of available values.
if 'All' in selected_categories or not selected_categories:
selected_categories = categories
else:
selected_categories = [cat for cat in selected_categories if cat != 'All']
if 'All' in selected_brands or not selected_brands:
selected_brands = brands
else:
selected_brands = [brand for brand in selected_brands if brand != 'All']
# Filter data based on selected categories and brands.
dff = df[(df['harmonized single category'].isin(selected_categories)) &
(df['brand'].isin(selected_brands))].copy()
# Recalculate nutrient_density using user-defined weights on the filtered data
# For each nutrient in beneficial/negative lists that exists in the data,
# compute z-scores on the filtered subset
def compute_subset_z(s):
return (s - s.mean()) / s.std() if s.std() !=0 else 0
# Create temporary DataFrame for z-scores on the subset
z_sub = pd.DataFrame()
for nutrient in beneficial + negative:
if nutrient in dff.columns:
z_sub[nutrient] = compute_subset_z(dff[nutrient])
# Keep only the nutrients that exist in the subset for each group
beneficial_cols = [nutrient for nutrient in beneficial if nutrient in z_sub.columns]
negative_cols = [nutrient for nutrient in negative if nutrient in z_sub.columns]
# Compute nutrient_density as sum(weighted beneficial z-scores) minus sum(weighted negative z-scores)
if not z_sub.empty:
density_beneficial = z_sub[beneficial_cols].mul(beneficial_weight).sum(axis=1)
density_negative = z_sub[negative_cols].mul(negative_weight).sum(axis=1)
dff['nutrient_density'] = density_beneficial - density_negative
else:
dff['nutrient_density'] = 0
# Radar Chart: Compute metric based on user selection
# Group data by category and calculate mean values for the selected nutrients
df_radar = dff.groupby('harmonized single category')[selected_nutrients].mean().reset_index()
# Depending on radar_metric choice, process the data:
# - "zscore": normalize each nutrient via z-score (for the subset of data)
# - "absolute": use raw absolute values
# - "percentage": calculate percentage of recommended daily value
if radar_metric == 'zscore':
for nutrient in selected_nutrients:
df_radar[nutrient] = (df_radar[nutrient] - df_radar[nutrient].mean()) / df_radar[nutrient].std() if df_radar[nutrient].std() != 0 else 0
elif radar_metric == 'percentage':
for nutrient in selected_nutrients:
rec_value = recommended_values.get(nutrient, 100)
df_radar[nutrient] = (df_radar[nutrient] / rec_value) * 100
radar_fig = go.Figure()
# Add a trace for each selected nutrient
for nutrient in selected_nutrients:
radar_fig.add_trace(go.Scatterpolar(
r=df_radar[nutrient],
theta=df_radar['harmonized single category'],
fill='toself',
name=nutrient
))
# Set layout for the radar chart with dark theme
radar_fig.update_layout(
title=f"Average Nutrients by Category ({'Z-Score' if radar_metric=='zscore' else ('Percentage' if radar_metric=='percentage' else 'Absolute')})",
polar=dict(
radialaxis=dict(
visible=True,
color='white'
)
),
template='plotly_dark',
font=dict(color='white')
)
# Scatter Plot: Dynamic axes and color coding based on user selections
scatter_fig = go.Figure(data=go.Scatter(
x=dff[scatter_x],
y=dff[scatter_y],
mode='markers',
marker=dict(
size=10,
color=dff[scatter_color],
colorscale='Viridis',
showscale=True
),
text=dff['name']
))
scatter_fig.update_layout(
title=f"Scatter Plot: {scatter_x} vs {scatter_y} (Color: {scatter_color})",
xaxis_title=scatter_x,
yaxis_title=scatter_y,
template='plotly_dark',
font=dict(color='white')
)
# Bar Chart: Compare average price per calorie by category
df_bar = dff.groupby('harmonized single category')['price_per_cal'].mean().reset_index()
bar_fig = go.Figure(data=go.Bar(
x=df_bar['harmonized single category'],
y=df_bar['price_per_cal'],
marker_color='lightsalmon'
))
bar_fig.update_layout(
title='Average Price per Calorie by Category',
xaxis_title='Category',
yaxis_title='Price per Calorie',
template='plotly_dark',
font=dict(color='white')
)
# Histogram: Distribution of selected nutrient
hist_fig = go.Figure(data=go.Histogram(
x=dff[hist_nutrient],
nbinsx=20,
marker_color='lightskyblue'
))
hist_fig.update_layout(
title=f"Distribution of {hist_nutrient}",
xaxis_title=hist_nutrient,
yaxis_title='Count',
template='plotly_dark',
font=dict(color='white')
)
return dff.to_dict("records"), radar_fig, scatter_fig, bar_fig, hist_fig
if __name__ == "__main__":
app.run(debug=True)