Py.Cafe

marie-anne/

2025-figurefriday-w13

Nutrition Analysis Visualizer using Dash and Plotly (AI speaking)

DocsPricing
  • GroceryDB_foods.csv
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 28 21:06:16 2025

@author: win11
"""

from dash import Dash, dcc, callback, Output, Input, html
import pandas as pd
#import plotly.graph_objs as go
import plotly.express as px
import numpy as np
import dash_bootstrap_components as dbc

# Download CSV sheet at: https://drive.google.com/file/d/1EoFTpSJOIYmVzemoMLj7vMTqeM1zMy0o/view?usp=sharing
df = pd.read_csv('GroceryDB_foods.csv')


#where you end up if you forget to remove a column
#anyway
#50000 DATAPOINTS IS TO MUCH, REMOVE COLUMNS OF NO INTEREST FOR THIS VISUAL
#GROUPBY CATEGORY WITH MEAN VALUE O

columns = df.select_dtypes(include=['number']).columns.tolist()  # Select only numeric columns

columns_del = ['name', 'store', 'brand', 'price', 'price percal', 'package_weight', 'Iron', 'Vitamin C', 'Total Vitamin A']
columns = [col for col in columns if col not in columns_del]  # Remove unwanted columns safely


#convert values in columns to  np.log maybe than it's more comparable, log10 was too flat


def create_log_column(columns, df):
    
    for column in columns:
    
        df[column+'_log'] =  df[column].map(lambda x: np.log(x)  if x > 0 else 0)

        


create_log_column(columns, df)

logcolumns =  list(map(lambda x: x + '_log', columns))


#Ensure columns are available before applying groupby, flatten by logcolumns
if logcolumns:
    dff = df.groupby(["harmonized single category"], as_index=False)[logcolumns].mean().round()
else:
    print("Error: No numeric columns left for aggregation")




def create_cat_column(logcolumns, dff):
    
    for column in logcolumns:
        

       
    
        dff[column+'_%'] =  dff[column].apply(lambda x: 'Very low' if x < df[column].quantile(0.20) else
                                           "Low" if x < df[column].quantile(0.40) else\
                                           "Medium" if x < df[column].quantile(0.60) else\
                                           "High" if x < df[column].quantile(0.80)  else
                                           "Very high"
                                               )





create_cat_column(logcolumns, dff)


#replace column names in columns with name + "_%"
newcolumns =  list(map(lambda x: x + '_log_%', columns))


dffm = pd.melt(dff, id_vars=['harmonized single category'], value_vars=newcolumns)

#remove the addition of _log_% from the variable before it's used for plotting
dffm['variable'] = dffm['variable'].apply(lambda x: x[:-6])




mydropdown = dcc.Dropdown(
    id='mydropdown',
    options=[
        {'label': i, 'value': i} for i in dff['harmonized single category'].unique()
        ],
    #multi=True, 
    value='baby-food'
)

# stylesheet with the .dbc class to style  dcc, DataTable and AG Grid components with a Bootstrap theme
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"


app = Dash(__name__, external_stylesheets=[dbc.themes.LUX, dbc.icons.FONT_AWESOME,  dbc_css])

app.layout =dbc.Container([
    
    
    dbc.Row([
        html.H1('Am I going to eat ...... ?'),
        dbc.Col([
            html.Div([    #
                html.Div([html.P('Select your foodgroup and get an idea of the ratio of some ingredients:',style={'fontWeight':'bold'}), mydropdown]),
                html.Div(id="visualarea")])
            
            
            ], style={'padding':'1rem'}),
        dbc.Col([
            

            dcc.Markdown('''
              ### About the data
                         
              (Data)Source: GroceryDB,   
                         
              title={Prevalence of processed foods in major US grocery stores},      
              
              author={Babak Ravandi and Gordana Ispirova and Michael Sebek and Peter 
                      Mehler and Albert-László Barabási and Giulia Menichetti},    
              
              journal={Nature Food}    
              
              year={2025},    
              
              dio={10.1038/s43016-024-01095-7},    
              
              url = {https://www.nature.com/articles/s43016-024-01095-7}
          '''),
          
            dcc.Markdown('''

                         
                         ### Method
                         Input: values, like protein, cholesterol etc for approx 50K products you can buy in a grocery store,
                         divided into foodgroups.
                         
                         Steps taken:
                         - convert all numbers into ln values (they ranged from very small to very high)
                         - per foodgroup I took the mean value
                         - I divided the values in between foodgroups into:
                             Very Low (<quantile(.20)),
                             Low (<quantile(.40)),
                             Medium (<quantile(.60)),
                             High (<quantile(.80)),
                             Very high (the rest.).
                         
                        **In other words, the marks on the screen give an idea how a foodgroup scores amongst other foodgroups based on the mean value for all products in the selected foodgroup.**   
                         
                         
                         
                         
                         '''),
            
            
            ], style={'padding':'1rem'})
        
        
        
        
        ], style={'marginTop': '4rem'})
    
    
    ], fluid=False)
  



@callback(
    Output("visualarea", "children"),
    Input("mydropdown", "value")
)
def update_multi_options(search_value):

    # Make sure that the set values are in the option list, else they will disappear
    # from the shown select list, but still part of the `value`.
    # return [
    #     o for o in options if search_value in o["label"] or o["value"] in (value or [])
    # ]
   
   #filter on selected food category
   
   dffo = dffm.loc[dffm['harmonized single category'] == search_value].copy()
   
   # Generate a rank column that will be used to sort
   # the dataframe numerically
   sorterList= ['Very low','Low','Medium','High','Very high']
   markerSize = [5,10,15,20,25]
   dffo['markersize'] = dffo['value'].apply(lambda x: markerSize[sorterList.index(x)])
   
   dffo["sort"] = dffo['value'].apply(lambda x: sorterList.index(x))

   
   dffo = dffo.sort_values(by=['sort'])



    
   fig = px.scatter_polar(dffo, r='value', theta="variable",
                          color="value", symbol="value", size = 'markersize',
                          color_discrete_sequence=px.colors.sequential.Plasma,

                          
                         ) 
    
   return dcc.Graph(id='visual', figure=fig)

    
    
    


if __name__ == "__main__":
    app.run(debug=True)