Py.Cafe

marie-anne/

2025-w5-figurefriday

Alternative for clickable wordcloud + regex filtering based on selected tags

DocsPricing
  • assets/
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 31 21:53:20 2025

@author: win11
AG grid image : https://community.plotly.com/t/dash-ag-grid-input-property-when-using-cellrendererselector/87921
from @paeger
"""

import dash
from dash import dcc, html, Input, Output
import pandas as pd
import dash_bootstrap_components as dbc
from random import randint
from sklearn.utils import shuffle

df = pd.read_csv("https://raw.githubusercontent.com/plotly/Figure-Friday/refs/heads/main/2025/week-5/Steam%20Top%20100%20Played%20Games%20-%20List.csv")
#I've copied the following 4 lines from member Avacsiglo21, short in time
df["Filter Price"] = df["Price"].replace("Free To Play", 0.0)
df["Filter Price"] = df["Filter Price"].astype(str).str.replace("£", "", regex=False).astype(float)
df["Current Players"] = df["Current Players"].str.replace(",", "").astype(int)
df["Peak Today"] = df["Peak Today"].str.replace(",", "").astype(int)
# is this a multiplayer game or not, to show on card, no time for filtering
df['Multiplayer'] = df['Genre Tags'].str.contains('Multiplayer')



#google queen, thanks to substack, on top of the tags lists is +, whatever that may mean. 

df_tags_raw = df['Genre Tags'].str.split(',\s+', expand=True).stack().value_counts().reset_index()
# Or rename the existing DataFrame (rather than creating a copy) 
df_tags_raw.rename(columns={'index': 'tag'}, inplace=True)
#remove a few tags for the tagselection, were supposed to be on two extra filteroptions
tags_remove = ['+', 'Multiplayer','Singleplayer','Free to Play', 'Massively Multiplayer']
df_tags = df_tags_raw.apply(lambda row: row[~df_tags_raw['tag'].isin(tags_remove)])



def set_fontcolor():
    #generate trhee numbers between 0 and 255 and create an rgba outputstring
    gencolor =  [randint(0, 255) for p in range(0, 3)]
    fontcolor=f"rgba({gencolor[0]},{gencolor[1]},{gencolor[2]})"
    return fontcolor

def set_fontsize(c):
    
    calculate_size = c/17
    fontsize=f"{calculate_size}rem"
    
    return fontsize



def wordcloud(all_tags):
    #shuffling the tags because it looks better when color and fontsize are random on screen.
    df_rand = shuffle(all_tags, random_state=0)
    #rotateoptons for all words removed, did work, not usable
   # transition_classes=['rotate45', 'rotate0', 'rotate-45','rotate90', 'rotate-90']
   #code to grep a rotateclass: {sample(transition_classes, 1)[0]}
        
    options=[
        {'label': html.Span(i, style={'color': set_fontcolor(), 'fontSize': set_fontsize(df_rand.loc[df_rand['tag'] == i, 'count'].iloc[0])}), 'value': i}  for i in df_rand.tag.unique()]
        
    tagbuttons =  dbc.Form( dbc.Checklist(
            id="taglist",
           className="btn-group",
           inputClassName="btn-check",
           labelClassName="btn btn-outline-primary ",
           labelCheckedClassName="active",
           options=options,
           label_checked_style={"color": "black", "backgroundColor": "rgba(255,255,255,0.1)", "borderColor": "rgba(5,5,5,0" },
        ))
    
    return tagbuttons





def card_with_overlays(r):
    multiplayer = 'Multiplayer'
    if r['Multiplayer'] == False: multiplayer = 'Singleplayer'
    card = dbc.Card(
    [
        dbc.CardImg(
            src=r['Thumbnail URL'],
            top=True,
            style={"opacity": 0.5},
        ),
        dbc.CardImgOverlay(
            dbc.CardBody(
                [   html.Div([
                    html.Span(r['Rank'], className='rondje'),
                    html.Span(r['Price'], className = 'info-rondje')
                    ], className = 'miniflex'),
                    #this title bothers me and it's not a seo assignment
                    html.Div(html.H4(r['Name'], className="card-title", style={'display': 'none'}),),
                    
                    html.Div([
                        html.Span(f"Current players: {r['Current Players']}", className = 'info-rondje'),
                        html.Span(f"{multiplayer}",  className = 'info-rondje')
                        
                ], className='miniflex'),
                
        ]),
            )
    ],
    className='col-md-6',
)
    
    return card




def card_grid(filtered_df):
    
    card_list = []
    for i in range(len(filtered_df)):  
        card_list.append(card_with_overlays(filtered_df.iloc[i]))
    
    return card_list



# Dash app setup

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.CYBORG, dbc.icons.FONT_AWESOME])

app.layout = dbc.Container([
    dbc.Row([
    dbc.Col([
        html.Div(
            id='wordcloud',
            children = wordcloud(df_tags.head(30))
        ),
        html.P(id="radioitems-checklist-output"),
        
        ], className='col-md-6'),
    dbc.Col([html.Div(id="popularity_container", className='flexgrid')], className='col-md-6')

    ], className='col-md-12'),
    
    ], fluid=True)



@app.callback(
 
    Output("popularity_container", "children"),
        Input("taglist", "value"),
        prevent_initial_call=True,
        )

def on_form_change(checklist_value):
 
     
    #where I ended up when I tried to filter the tags on a and b, I know of chatgpt
    #https://stackoverflow.com/questions/6930982/how-to-use-a-variable-inside-a-regular-expression
     base = r'^{}'
     expr = '(?=.*{})'
     words = checklist_value  #checklistvalue is a list of selected tags
     regexstring = base.format(''.join(expr.format(w) for w in words))
    

     filtered_df = df[df['Genre Tags'].str.contains(rf'{regexstring}',regex=True)]

     return card_grid(filtered_df)





if __name__ == '__main__':
    app.run_server(debug=True)