import pandas as pd
from dash import Dash, html, dcc, Input, Output, callback
import dash_bootstrap_components as dbc
import numpy as np
import plotly.express as px
df = pd.read_csv("https://raw.githubusercontent.com/plotly/Figure-Friday/refs/heads/main/2025/week-2/samples.tsv", sep='\t')
#replace blinded name "Tablet" with "Tablets" in blinded_name
#replace blinded name "Protein Bar" with "Protein bar" in blinded_name
df.loc[df['blinded_name'] == 'Tablet', 'blinded_name'] = 'Tablets'
df.loc[df['blinded_name'] == 'Protein Bar', 'blinded_name'] = 'Protein bar'
#may some grouping/dropdown thing
df_chemicals = pd.read_csv("chemicals.csv")
#distinct blind_names = 117, samples = 618
#blind name in percentile, like fish percentile scores for some of the ng/g stats
#percentile nanogram per gram columns are:
percentile_cols = [col for col in df.columns if 'percentile' in col and '_g' in col and 'equivalents' not in col]
#start new df with non-percentile columns to use
df_data = df[['id','product_id', 'product', 'blinded_name']].copy()
#append percentile for ng/gram to dataframe
for x in percentile_cols:
df_data[x] = df[x].apply(lambda x: x if x in ['<LOQ', 'NO RESULT']
else ('Q4' if int(x) > 75
else ('Q3' if int(x) >50
else ('Q2' if int(x) >25
else ('Q1' if int(x) >= 0
else 'Error'
)))))
#convert values in columns to something to use in a visual
def convert_percentiles_into_dict(a):
#input column, output dict with countvalues
#inititialize empty dict
count_sample_output = {'<LOQ': 0, 'Q1': 0, 'Q2': 0,'Q3':0, 'Q4':0}
#count
count_sample_output['Q1'] = int(a.value_counts().get('Q1', 0))
count_sample_output['Q2'] = int(a.value_counts().get('Q2', 0))
count_sample_output['Q3'] = int(a.value_counts().get('Q3', 0))
count_sample_output['Q4'] = int(a.value_counts().get('Q4', 0))
count_sample_output['<LOQ'] = int(a.value_counts().get('<LOQ', 0))
return count_sample_output
def create_productlist_fromcol(a):
print(a.unique())
productlist = list(a.unique())
return productlist
#number of samples per blinded_name and number of products tested and number of samples tested
#dict with summary of LOQ and P values
df_grouped_blinded_names = df_data.groupby(['blinded_name']).agg(
number_of_samples = pd.NamedAgg('id', 'count'),
number_of_different_products=pd.NamedAgg(column="product_id", aggfunc=lambda x: x.nunique()),
productlist = pd.NamedAgg(column="product", aggfunc=lambda x: ', '.join(x.unique())),
DEHP_results = pd.NamedAgg(column="DEHP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DBP_results = pd.NamedAgg(column="DBP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
BBP_results = pd.NamedAgg(column="BBP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DINP_results = pd.NamedAgg(column="DINP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DIDP_results = pd.NamedAgg(column="DIDP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DEP_results = pd.NamedAgg(column="DEP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DMP_results = pd.NamedAgg(column="DMP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DIBP_results = pd.NamedAgg(column="DIBP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DNHP_results = pd.NamedAgg(column="DNHP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DCHP_results = pd.NamedAgg(column="DCHP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DNOP_results = pd.NamedAgg(column="DNOP_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
BPA_results = pd.NamedAgg(column="BPA_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
BPS_results = pd.NamedAgg(column="BPS_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
BPF_results = pd.NamedAgg(column="BPF_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DEHT_results = pd.NamedAgg(column="DEHT_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DEHA_results = pd.NamedAgg(column="DEHA_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DINCH_results = pd.NamedAgg(column="DINCH_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
DIDA_results = pd.NamedAgg(column="DIDA_percentile_ng_g", aggfunc=lambda x: convert_percentiles_into_dict(x)),
).reset_index()
#create categorical barchart (categories is LOQ, P's)
def create_productlist_card(selected_food_group):
productlist = str(df_grouped_blinded_names.loc[df_grouped_blinded_names['blinded_name']==selected_food_group]['productlist'].item()).split(',')
#print(productlist)
listChildren = []
for x in productlist:
listChildren.append(dbc.ListGroupItem(x))
productlist_card = dbc.Col(
dbc.Card([ html.P('Disclaimer: this app offers a way to look at the data collected and analysed by plasticlist.org. Visuals are based on percentiles nanogram/gram. ChatGPT offered me 5 free tweet length descriptions for a chemical.',style={"fontWeight":"bold","fontSize":"11px"}),
html.P('Do not draw any conclusions from here, go to plasticlist.org to read all about the research.',style={"fontWeight":"bold","fontSize":"11px"}),
html.A("Plasticlist.org", href='https://plastlist.org', target="_blank",style={"fontWeight":"bold","fontSize":"11px"}),
html.P("PlasticList. 'Data on Plastic Chemicals in Bay Area Foods'. plasticlist.org. Accessed Jan 10, 2025.",style={"fontWeight":"bold","fontSize":"11px"}),
html.H3("Productlist:", style={"marginTop":"1rem"}),
dbc.ListGroup(id = "productlist", children = listChildren, flush=True, style={"fontSize":"10px"})],
), className='col-lg-3 col-md-6 col-sm-12'
)
return productlist_card
def create_chemical_card(dfin, food_group, chemical):
#get description if available
desc=df_chemicals.loc[df_chemicals['Chemical'] == chemical]['Description'].item()
if len(str(desc)) == 0:
desc = ''
#locate datadict, when are you going to remember to add .item()
colname = chemical + '_results'
d=dfin.loc[dfin['blinded_name'] == food_group][colname].item()
#colorset for results
color_map = {'<LOQ':'darkgrey','Q1':'#F4E5CC','Q2':'#eacb99','Q3': '#e0b166','Q4':'#d69732'}
#extract x en y from dict
x_vals = list(d.keys())
y_vals = list(d.values())
#ymax equals number of samples for blinded name
y_max=dfin.loc[dfin['blinded_name'] == food_group]['number_of_samples'].item()
fig = px.bar(x=x_vals, y=y_vals, color=x_vals, color_discrete_map = color_map)
fig.update_layout(showlegend=False,
margin=dict(l=10, r=10, t=10, b=10),
xaxis=dict(
title=dict(
text="Result"
)
),
yaxis=dict(
title=None
),)
fig.update_yaxes(range=[0,y_max+1])
card_chemical = dbc.Col(
dbc.Card([ html.H3(chemical),
html.P(desc, style={"font-size": "11px"}),
dcc.Graph(id= str(chemical)+"_plot", figure = fig, style={'height':'350px'})],
), className='col-lg-3 col-md-6 col-sm-12'
)
return card_chemical
def bar_chart_grid(dfin, selected_chemical_group, df_chemicals,food_group):
#create list with chemicals in chemical group
chemicals_list = sorted(list(df_chemicals.loc[df_chemicals['Chemical Group'] == selected_chemical_group]['Chemical']))
card_list=[create_productlist_card(food_group)]
for x in chemicals_list:
card_list.append(create_chemical_card(dfin, food_group, x))
# print(card_list)
return card_list
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"
app = Dash(__name__, external_stylesheets=[dbc.themes.SANDSTONE, dbc_css])
app.layout = dbc.Container(
[dbc.Row([
html.Div([
html.H2(id="h2header"),
], className='col-md-7'),
html.Div([ dcc.Dropdown(id='dropdown_food', options=[
{'label': i, 'value': i} for i in df_grouped_blinded_names['blinded_name'].unique()
], multi=False, value='Almond milk', placeholder='Filter food category...')], className = "col-md-3"),
html.Div([dcc.Dropdown(id='dropdown_chemicalgroups', options=[
{'label': i, 'value': i} for i in df_chemicals['Chemical Group'].unique()
], multi=False, value='Phthalates' , placeholder='Filter chemical group...')], className = 'col-md-2')
], style={"position":"fixed", "top": "0", "left": "0", "width": "100%","height":"100px",
"backgroundColor":"lightblue","zIndex":"1000", "margin":"0",
"padding": "1rem", "display":"flex", "alignItems":"center"}),
dbc.Row([
dbc.Col([
html.Div(id="bargrid", style={"display": "flex", "flexWrap":"wrap"} , children=bar_chart_grid(df_grouped_blinded_names, 'Phthalate substitutes', df_chemicals,'Baby formula'))
#dcc.Graph(id="scatter-plot", figure = create_bar_chart(df_grouped_blinded_names, 'Baby formula')),
], className = 'col-md-12')
], style={"marginTop": "120px"}),
#add to row link to https://www.plasticlist.org/
dbc.Row([html.P('Data: PlasticList. "Data on Plastic Chemicals in Bay Area Foods". plasticlist.org. Accessed Jan 10, 2025.')])
], style={"marginTop": "2rem"})
@app.callback( Output('bargrid', 'children'),
Output('h2header', 'children'),
# Output('productlist', 'children'),
Input(component_id='dropdown_food', component_property='value'),
Input(component_id='dropdown_chemicalgroups', component_property='value')
)
def update_job_data(selected_food_group, selected_chemical_group):
newgrid =bar_chart_grid(df_grouped_blinded_names, selected_chemical_group, df_chemicals, selected_food_group)
samples = str(df_grouped_blinded_names.loc[df_grouped_blinded_names['blinded_name']==selected_food_group]['number_of_samples'].item())
products = str(df_grouped_blinded_names.loc[df_grouped_blinded_names['blinded_name']==selected_food_group]['number_of_different_products'].item())
h2 = f"{selected_food_group}: {samples} sample(s), {products} different product(s)"
return newgrid,h2
app.run_server(debug=True)