import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
import pandas as pd
import numpy as np
import plotly.figure_factory as ff
import plotly.colors as pc
import plotly.graph_objects as go
def genSankey(df, cat_cols=[], value_cols='', c_scale='', region ='', year='', shipmode=''):
labelList = []
colorNumList = []
for catCol in cat_cols:
labelListTemp = list(set(df[catCol].values))
colorNumList.append(len(labelListTemp))
labelList = labelList + labelListTemp
# Remove duplicates from labelList
labelList = list(dict.fromkeys(labelList))
# Define base colors using Viridis color scale
num_labels = len(labelList)
color_scale = getattr(pc.sequential, c_scale)
base_colors = pc.sample_colorscale(color_scale, [i / (num_labels - 1) for i in range(num_labels)])
# Assign colors directly
color_dict = {label: color for label, color in zip(labelList, base_colors)}
# Transform df into a source-target pair
for i in range(len(cat_cols) - 1):
if i == 0:
sourceTargetDf = df[[cat_cols[i], cat_cols[i + 1], value_cols]]
sourceTargetDf.columns = ['source', 'target', 'count']
else:
tempDf = df[[cat_cols[i], cat_cols[i + 1], value_cols]]
tempDf.columns = ['source', 'target', 'count']
sourceTargetDf = pd.concat([sourceTargetDf, tempDf])
sourceTargetDf = sourceTargetDf.groupby(['source', 'target']).agg({'count': lambda x: round(x.sum(), 2)}).reset_index()
# Add index for source-target pair
sourceTargetDf['sourceID'] = sourceTargetDf['source'].apply(lambda x: labelList.index(x))
sourceTargetDf['targetID'] = sourceTargetDf['target'].apply(lambda x: labelList.index(x))
# Create a list of colors for the links with alpha 0.5
link_colors = [color_dict[src].replace('rgb(', 'rgba(').replace(')', ', 0.5)') for src in sourceTargetDf['source']]
# Creating the sankey diagram
data = dict(
type='sankey',
node=dict(
pad=15,
thickness=20,
line=dict(
color="black",
width=0.5
),
label=labelList,
color=[color_dict[label] for label in labelList]
),
link=dict(
source=sourceTargetDf['sourceID'],
target=sourceTargetDf['targetID'],
value=sourceTargetDf['count'].apply(lambda x: float("{:.2f}".format(x))),
color=link_colors
)
)
layout = dict(template='seaborn',
title=dict(
text=f'Sankey of {"all years" if year == "All" else year} sales data, '
f'for {"all regions" if region == "All" else "the " + region.lower() + " region"}, '
f'and {"all shipping modes" if shipmode == "All" else shipmode.lower() + " shipping mode"}',
),
width=900,
height=500,
font=dict(color='#FFFFFF'),
margin=dict(b=35, t=35, l=0, r=0),
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)'
)
fig = dict(data=[data], layout=layout)
return fig
def sales_map(df,selected_region, selected_year, selected_shipmode):
fig = ff.create_hexbin_mapbox(
data_frame=df, lat="Lat", lon="Long",
nx_hexagon=20,
opacity=0.6,
color="Sales",
agg_func=np.sum,
labels={"color": "Sales"},
mapbox_style='open-street-map',
min_count=1,
color_continuous_scale='plasma',
)
fig.update_layout(title=dict(
text=f'Map of {"all years" if selected_year == "All" else selected_year} sales data, '
f'for {"all regions" if selected_region == "All" else "the " + selected_region.lower() + " region"}, '
f'and {"all shipping modes" if selected_shipmode == "All" else selected_shipmode.lower() + " shipping mode"}',
),
width=900,
height=500,
font=dict(color='#FFFFFF'),
margin=dict(b=35, t=35, l=0, r=0),
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)'
)
return fig
def sankey_figure(df, selected_region, selected_year, selected_shipmode):
grouped_df = df.groupby(['Segment','State/Province','Category']).agg({
'Sales': 'sum',
# 'Profit': 'sum'
}).reset_index()
cat_columns = grouped_df.columns.tolist()
x = len(cat_columns)-1
cat_columns.pop(x)
cat_columns
fig = genSankey(df, cat_cols=cat_columns,value_cols='Sales',c_scale='Darkmint', region=selected_region, year=selected_year, shipmode=selected_shipmode)
return fig
# Load data
data_df = pd.read_csv('Superstore.csv')
data_df['Order Date'] = pd.to_datetime(data_df['Order Date'], dayfirst=True)
data_df['order_year'] = data_df['Order Date'].dt.year
# Extract unique values for filters
regions = sorted(data_df['Region'].unique())
regions.insert(0, 'All')
orderyears = sorted(data_df['order_year'].unique(), reverse=True)
orderyear_options = [{'label': str(orderyear), 'value': orderyear} for orderyear in orderyears]
orderyear_options.insert(0, {'label': 'All', 'value': 'All'})
shipmodes = sorted(data_df['Ship Mode'].unique())
shipmodes.insert(0, 'All')
# Initialize the Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Define custom theme
custom_theme = {
"primary": "#75C9BE",
"secondary": "#d6e8e7",
"info": "#3B9ECB",
"gray": "#adb5bd",
"success": "#8BE3AA",
"warning": "#F9F871",
"danger": "#c0003e",
"body_bg": "#1F5869",
"content_bg": "#153F4C",
"text_color": "#CBE2E2"
}
# Define the layout
app.layout = dbc.Container(
[
dbc.Row(
dbc.Col(
html.H1(
'Figure Friday - Week 28 - Sample Superstore Data',
style={'textAlign': 'center', 'color': custom_theme["text_color"]}
),
width=12
),
justify='center'
),
dbc.Row(
[
dbc.Col(
[
html.Label('Select a Region', style={'color': custom_theme["text_color"]}),
dcc.Dropdown(id='region-filter', options=[{'label': region, 'value': region} for region in regions], value='All')
],
width=4,
className="my-4",
style={'padding': '0 5px'}
),
dbc.Col(
[
html.Label('Select an Order Year', style={'color': custom_theme["text_color"]}),
dcc.Dropdown(id='sales_year-filter', options=orderyear_options, value='All')
],
width=4,
className="my-4",
style={'padding': '0 5px'}
),
dbc.Col(
[
html.Label('Select a Shipping Mode', style={'color': custom_theme["text_color"]}),
dcc.Dropdown(id='ship_mode-filter', options=[{'label': shipmode, 'value': shipmode} for shipmode in shipmodes], value='All')
],
width=4,
className="my-4",
style={'padding': '0 5px'}
),
],
justify='center',
style={'margin-bottom': '20px'}
),
dbc.Row(
[
dbc.Col(
dcc.Graph(id='sales_graph1'),
width=6,
className="my-4",
style={'padding': '0 5px'}
),
dbc.Col(
dcc.Graph(id='sankey'),
width=6,
className="my-4",
style={'padding': '0 5px'}
),
]
)
],
fluid=True,
style={'backgroundColor': custom_theme["body_bg"]}
)
# Define callbacks to update figures based on selected filters
@app.callback(
[Output('sales_graph1', 'figure'),
Output('sankey', 'figure')],
[Input('region-filter', 'value'),
Input('sales_year-filter', 'value'),
Input('ship_mode-filter', 'value')]
)
def update_figures(selected_region, selected_year, selected_shipmode):
filtered_df = data_df
if selected_region != 'All':
filtered_df = filtered_df[filtered_df['Region'] == selected_region]
if selected_year != 'All':
filtered_df = filtered_df[filtered_df['order_year'] == selected_year]
if selected_shipmode != 'All':
filtered_df = filtered_df[filtered_df['Ship Mode'] == selected_shipmode]
sales_map_fig = sales_map(filtered_df, selected_region, selected_year, selected_shipmode)
sankey_fig = sankey_figure(filtered_df, selected_region, selected_year, selected_shipmode)
return sales_map_fig, sankey_fig
if __name__ == '__main__':
app.run_server(debug=True)