PyCafe - Dash - Dash Color Picker Demo

assets/
Dallas_Animal_Shelter_Data_Fiscal_Year_Jan_2024.csv
app.py
dallas_animal_shelter.py
requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import pandas as pd
import plotly.express as px
from sksurv.nonparametric import kaplan_meier_estimator
import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc


DATA_FILE = 'Dallas_Animal_Shelter_Data_Fiscal_Year_Jan_2024.csv'
OUTCOME_MAPPING = {
    'ADOPTION': 'Exit', 'RETURNED TO OWNER': 'Exit', 'TRANSFER': 'Exit', 'FOSTER': 'Exit', 'DISPOSAL': 'Exit',
    'TNR': 'Exit', 'WILDLIFE': 'Exit', 'SNR': 'Exit', 'EUTHANIZED': 'Stay', 'DIED': 'Stay', 'LOST EXP': 'Stay',
    'FOUND EXP': 'Stay', 'TREATMENT': 'Stay', 'MISSING': 'Stay'
}
TOP_BREEDS_COUNT = 20

def load_and_preprocess_data():
    df = pd.read_csv(DATA_FILE)
    df_dogs = df[df['Animal_Type'] == 'DOG'].copy()  # Usar .copy() para evitar warnings

    df_dogs['Outcome_Category'] = df_dogs['Outcome_Type'].replace(OUTCOME_MAPPING)
    df_dogs['Intake_DateTime'] = pd.to_datetime(df_dogs['Intake_Date'] + ' ' + df_dogs['Intake_Time'])
    df_dogs['Outcome_DateTime'] = pd.to_datetime(df_dogs['Outcome_Date'] + ' ' + df_dogs['Outcome_Time'])
    df_dogs['Duration'] = (df_dogs['Outcome_DateTime'] - df_dogs['Intake_DateTime']).dt.total_seconds() / (60 * 60 * 24)
    df_dogs['Event'] = df_dogs['Outcome_Category'] == 'Exit'
    df_dogs.dropna(subset=['Duration'], inplace=True)
    df_dogs = df_dogs[df_dogs['Duration'] >= 0]

    top_breeds = df_dogs['Animal_Breed'].value_counts().nlargest(TOP_BREEDS_COUNT).index
    df_top_breeds = df_dogs[df_dogs['Animal_Breed'].isin(top_breeds)]

    return df_top_breeds, top_breeds

df_top_breeds, top_breeds = load_and_preprocess_data()


breed_colors = {breed: px.colors.qualitative.D3[i % len(px.colors.qualitative.D3)] for i, breed in enumerate(top_breeds)}


def calculate_survival_curves(breeds):
    survival_data = []
    for breed in breeds:
        breed_data = df_top_breeds[df_top_breeds['Animal_Breed'] == breed]
        time, survival_prob = kaplan_meier_estimator(breed_data['Event'].astype(bool), breed_data['Duration'])
        survival_data.append(pd.DataFrame({'Time': time, 'Survival Probability': survival_prob, 'Breed': breed}))
    return pd.concat(survival_data)


def create_survival_plot(survival_df):
    if survival_df.empty:
        return px.line(title="No data to show")
    fig = px.line(survival_df, x='Time', y='Survival Probability', color='Breed',
                  color_discrete_map=breed_colors,markers=True,
                  labels={'Time': 'Days', 'Survival Probability': '% Exit Probability'},
                  line_shape="spline", template='plotly_white', 
                 )
    fig.update_layout(
        title_font=dict(size=20, family='Arial', color='black'),
        xaxis_title_font=dict(size=14, family='Arial', color='black'),
        yaxis_title_font=dict(size=14, family='Arial', color='black'),
        legend_title_font=dict(size=16, family='Arial', color='black')
    )
    
    
    return fig

def create_breed_count_plot(filtered_df):
    breed_counts = filtered_df['Animal_Breed'].value_counts().reset_index()
    breed_counts.columns = ['Breed', 'Count']
    fig_bar_breedcount = px.bar(breed_counts, x='Breed', y='Count', 
                                 text_auto='.2f', template='plotly_white', labels={'Breed': ''},
                                 color='Breed', color_discrete_map=breed_colors)

    fig_bar_breedcount.update_yaxes(visible=False)
    fig_bar_breedcount.update_layout(showlegend=False)
    
    
    return fig_bar_breedcount

def create_breed_duration_plot(filtered_df):
    breed_duration = (filtered_df.groupby('Animal_Breed')['Duration'].agg(['mean', 'median'])
                      .reset_index().sort_values('mean', ascending=False))
    fig_bar_breedmean = px.bar(breed_duration, x='Animal_Breed', y='mean', 
                               text_auto='.2f', template='plotly_white', labels={'Animal_Breed': ''},
                               color='Animal_Breed', color_discrete_map=breed_colors)
    
    fig_bar_breedmean.update_yaxes(visible=False)
    fig_bar_breedmean.update_layout(showlegend=False)
    
    for index, row in breed_duration.iterrows():
        fig_bar_breedmean.add_annotation(x=row['Animal_Breed'], y=row['mean'],
                             text=f"Median: {row['median']:.2f}", showarrow=False, yshift=10)
    return fig_bar_breedmean

#Styles spaces
style_space = {'border': 'none', 'height': '5px', 'background': 'linear-gradient(to right, #007bff, #ff7b00)', 'margin': '10px 0'}

# Dash App
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.LUX])

app.title=' Dallas Animal Shelter'

app.layout = dbc.Container([
    html.Hr(style=style_space),
    html.H2("Dog Diaries: Analyzing Breeds' Fate in Dallas Animal Shelter", style={'text-align': 'center'}),
    html.Hr(style=style_space),
    html.Div([
        html.P("Analyzing only dogs, which make up 80% of the shelter animals, we selected the top 20 breeds, representing 90% of this group", style={'text-align': 'center', 'margin-top': '20px', 'font-style': 'italic','font-size': '24px',  'color': 'black'}),
        html.Hr(style=style_space)
    ]),
    dbc.Row([
        dbc.Col(dbc.Card(dcc.Dropdown(id='breed-dropdown', options=[{'label': breed, 'value': breed} for breed in top_breeds],
                             value=top_breeds[:3], multi=True)), width=12,class_name="btn-group dash-dropdown")
    ]),
    html.Hr(style={'border': 'none', 'height': '2px', 'background': 'linear-gradient(to right, #007bff, #ff7b00)', 'margin': '10px 0'}),
    dbc.Row([html.H5("Exit Probability by Days: A Closer Look at Each Breed",style={'text-align': 'center'}),
             dbc.Col(dcc.Graph(id='survival-plot'), width=12)]),
    dbc.Row(
        [dbc.Col([
            html.H5("Top Dog Breeds at Dallas Shelter: A Count Analysis",style={'text-align': 'center'}),
            dcc.Graph(id='breed-count-plot')], width=6),
         dbc.Col([
            html.H5("Shelter Stays: Average and Median Duration Dog Breeds",style={'text-align': 'center'}),
            dcc.Graph(id='breed-duration-plot')], width=6)
        ])
    
],fluid=True)

@app.callback(
    [Output('survival-plot', 'figure'), Output('breed-count-plot', 'figure'),
     Output('breed-duration-plot', 'figure')],
    Input('breed-dropdown', 'value')
)
def update_plot(selected_breeds):
    if selected_breeds:
        filtered_df = df_top_breeds[df_top_breeds['Animal_Breed'].isin(selected_breeds)]
        return (create_survival_plot(calculate_survival_curves(selected_breeds)),
                create_breed_count_plot(filtered_df),
                create_breed_duration_plot(filtered_df))
    else:
        return create_survival_plot(pd.DataFrame()), px.bar(), px.bar()