Py.Cafe

nataliatsyporkin/

WordCloud_Figure_Friday_week_43

Interactive Word and Bigram Analysis Dashboard - Plotly & Dash

DocsPricing
  • app.py
  • helper.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Figure Friday 2024 - week 43
# https://community.plotly.com/t/figure-friday-2024-week-43/88243

import dash
from dash import Dash, dcc, html, Input, Output, callback
import dash_bootstrap_components as dbc

from helper import *


# Download the file from github
df = pd.read_csv("https://raw.githubusercontent.com/plotly/Figure-Friday/refs/heads/main/2024/week-43/OpenRepair_Data_RepairCafeInt_202407.csv", low_memory=False)
df.dropna(subset='problem', inplace=True)

# Apply the function to each row
df['words'] = df['problem'].apply(lambda x: ' '.join(extract_ngrams(x)[0])) # -> string
df['bigrams'] = df['problem'].apply(lambda x: extract_ngrams(x)[1])         # -> list[tuple]

all_words = ' '.join(df['words'])
unique_words = len(set(all_words.split()))
wc_chart = create_word_cloud(all_words)

bar_top_20 = create_bar_top_20_words_and_bigrams(df[['words', 'bigrams']])    


# Define dropdown for categories---------------------------------------------------
category_dropdown = dcc.Dropdown(
    id="dropdown",
    options= sorted(['All'] + df['product_category'].unique().tolist()) ,
    value='Vacuum',
    clearable=False,
    searchable=True,
    style={'background-color':'#F6F8FA', 'margin-top':'20px', 'margin-bottom':'20px'})
    

# Create app---------------------------------------------------------------
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])


# Define the app layout----------------------------------------------------
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H4('Select Category', style= {'color' :'#1F77B4'}), 
            width=2, class_name='text-center'),
        dbc.Col(category_dropdown, width=4),
        dbc.Col(html.H4(['Number of Unique Words = ',
                         html.Span(id='unique-words', children=f'{unique_words:,.0f}')], style= {'color' :'#1F77B4'}),
            width=6, class_name='text-center',)
    ], style={'align-items': 'center', 'border': 'solid #2980B9 1px', 'background-color': 'powderblue'} 
    ), 

    dbc.Row([
        dbc.Col(dcc.Graph(id='word-cloud', figure=wc_chart, config={'staticPlot': True}), 
            width=6, 
            style={'border': 'solid lightgrey', 'padding-top':'15px', 'padding-bottom':'15px', 
                  'border-width': '1px', 'background-color': '#F6F8FA', 'justify-items': 'center'} ),
        dbc.Col(dcc.Graph(id='bar-chart', figure=bar_top_20, config={'displayModeBar': False}),
            width=6, 
            style={'border': 'solid lightgrey',  'border-width': '1px',
                  'background-color': '#F6F8FA', 'justify-items': 'center'} ),                
    ]),     
],
    fluid=False, # don't expand to full width    
)


# Define the callback and update function
@app.callback(
    Output('word-cloud', 'figure'), 
    Output('bar-chart', 'figure'), 
    Output('unique-words', 'children'),   
    Input('dropdown', 'value')
)

def update_word_cloud(category):
    if category == 'All':
        return wc_chart, bar_top_20 , f'{unique_words:,.0f}'       
    else:
        filtered_df = df[df['product_category'] == category][['words', 'bigrams']]
        category_text = ' '.join(filtered_df['words'])
        num_unique_words = len(set(category_text.split()))
        bar_top_20_updated = create_bar_top_20_words_and_bigrams(filtered_df)
        if category == 'Vacuum':
            wc_vacuum = create_word_cloud(category_text, mask=vacuum_mask)
            return wc_vacuum , bar_top_20_updated, f'{num_unique_words:,.0f}'
        else:            
            wc_updated = create_word_cloud(category_text)              
            return wc_updated , bar_top_20_updated, f'{num_unique_words:,.0f}'


if __name__ == "__main__":
    app.run_server(debug=False) # debug=True --> auto-reload