# -*- coding: utf-8 -*-
"""
Created on Fri Mar 28 21:06:16 2025
@author: win11
"""
from dash import Dash, dcc, callback, Output, Input, html
import pandas as pd
#import plotly.graph_objs as go
import plotly.express as px
import numpy as np
import dash_bootstrap_components as dbc
# Download CSV sheet at: https://drive.google.com/file/d/1EoFTpSJOIYmVzemoMLj7vMTqeM1zMy0o/view?usp=sharing
df = pd.read_csv('GroceryDB_foods.csv')
#where you end up if you forget to remove a column
#anyway
#50000 DATAPOINTS IS TO MUCH, REMOVE COLUMNS OF NO INTEREST FOR THIS VISUAL
#GROUPBY CATEGORY WITH MEAN VALUE O
columns = df.select_dtypes(include=['number']).columns.tolist() # Select only numeric columns
columns_del = ['name', 'store', 'brand', 'price', 'price percal', 'package_weight', 'Iron', 'Vitamin C', 'Total Vitamin A']
columns = [col for col in columns if col not in columns_del] # Remove unwanted columns safely
#convert values in columns to np.log maybe than it's more comparable, log10 was too flat
def create_log_column(columns, df):
for column in columns:
df[column+'_log'] = df[column].map(lambda x: np.log(x) if x > 0 else 0)
create_log_column(columns, df)
logcolumns = list(map(lambda x: x + '_log', columns))
#Ensure columns are available before applying groupby, flatten by logcolumns
if logcolumns:
dff = df.groupby(["harmonized single category"], as_index=False)[logcolumns].mean().round()
else:
print("Error: No numeric columns left for aggregation")
def create_cat_column(logcolumns, dff):
for column in logcolumns:
dff[column+'_%'] = dff[column].apply(lambda x: 'Very low' if x < df[column].quantile(0.20) else
"Low" if x < df[column].quantile(0.40) else\
"Medium" if x < df[column].quantile(0.60) else\
"High" if x < df[column].quantile(0.80) else
"Very high"
)
create_cat_column(logcolumns, dff)
#replace column names in columns with name + "_%"
newcolumns = list(map(lambda x: x + '_log_%', columns))
dffm = pd.melt(dff, id_vars=['harmonized single category'], value_vars=newcolumns)
#remove the addition of _log_% from the variable before it's used for plotting
dffm['variable'] = dffm['variable'].apply(lambda x: x[:-6])
mydropdown = dcc.Dropdown(
id='mydropdown',
options=[
{'label': i, 'value': i} for i in dff['harmonized single category'].unique()
],
#multi=True,
value='baby-food'
)
# stylesheet with the .dbc class to style dcc, DataTable and AG Grid components with a Bootstrap theme
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"
app = Dash(__name__, external_stylesheets=[dbc.themes.LUX, dbc.icons.FONT_AWESOME, dbc_css])
app.layout =dbc.Container([
dbc.Row([
html.H1('Am I going to eat ...... ?'),
dbc.Col([
html.Div([ #
html.Div([html.P('Select your foodgroup and get an idea of the ratio of some ingredients:',style={'fontWeight':'bold'}), mydropdown]),
html.Div(id="visualarea")])
], style={'padding':'1rem'}),
dbc.Col([
dcc.Markdown('''
### About the data
(Data)Source: GroceryDB,
title={Prevalence of processed foods in major US grocery stores},
author={Babak Ravandi and Gordana Ispirova and Michael Sebek and Peter
Mehler and Albert-László Barabási and Giulia Menichetti},
journal={Nature Food}
year={2025},
dio={10.1038/s43016-024-01095-7},
url = {https://www.nature.com/articles/s43016-024-01095-7}
'''),
dcc.Markdown('''
### Method
Input: values, like protein, cholesterol etc for approx 50K products you can buy in a grocery store,
divided into foodgroups.
Steps taken:
- convert all numbers into ln values (they ranged from very small to very high)
- per foodgroup I took the mean value
- I divided the values in between foodgroups into:
Very Low (<quantile(.20)),
Low (<quantile(.40)),
Medium (<quantile(.60)),
High (<quantile(.80)),
Very high (the rest.).
**In other words, the marks on the screen give an idea how a foodgroup scores amongst other foodgroups based on the mean value for all products in the selected foodgroup.**
'''),
], style={'padding':'1rem'})
], style={'marginTop': '4rem'})
], fluid=False)
@callback(
Output("visualarea", "children"),
Input("mydropdown", "value")
)
def update_multi_options(search_value):
# Make sure that the set values are in the option list, else they will disappear
# from the shown select list, but still part of the `value`.
# return [
# o for o in options if search_value in o["label"] or o["value"] in (value or [])
# ]
#filter on selected food category
dffo = dffm.loc[dffm['harmonized single category'] == search_value].copy()
# Generate a rank column that will be used to sort
# the dataframe numerically
sorterList= ['Very low','Low','Medium','High','Very high']
markerSize = [5,10,15,20,25]
dffo['markersize'] = dffo['value'].apply(lambda x: markerSize[sorterList.index(x)])
dffo["sort"] = dffo['value'].apply(lambda x: sorterList.index(x))
dffo = dffo.sort_values(by=['sort'])
fig = px.scatter_polar(dffo, r='value', theta="variable",
color="value", symbol="value", size = 'markersize',
color_discrete_sequence=px.colors.sequential.Plasma,
)
return dcc.Graph(id='visual', figure=fig)
if __name__ == "__main__":
app.run(debug=True)