PyCafe - Dash - Programming Language Popularity, 2004 to 2024

Popularity of Programming Languages from 2004 to 2024.csv
app.py
requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import polars as pl
import polars.selectors as cs
import plotly.express as px
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import dash_ag_grid as dag
'''
    This dashboard shows popularity of programming languages from 2004 to 2024.
    All data are percentages, where the totals across each row should add up to 
    100%. This will be checked, exceptions will be noted. 
'''

#----- DEFINE CONSTANTS---------------------------------------------------------
citation = (
    'Thank you to Muhammad Khalid and Kaggle for the data, ' +
     'https://www.kaggle.com/datasets/muhammadkhalid/'   
)

style_space = {
    'border': 'none', 
    'height': '5px', 
    'background': 'linear-gradient(to right, #007bff, #ff7b00)', 
    'margin': '10px 0'
    }

#----- DEFINE FUNCTIONS---------------------------------------------------------
def create_line_plot(lang_1, lang_2):
    fig = px.line(
        df,
        'DATE',
        [lang_1, lang_2], 
        title=(f'{lang_1} vs. {lang_2}<br><sup>Popularity by Year</sup>'),
        template='simple_white',
    )
    fig.update_layout(
        yaxis_title='POPULARITY',
        xaxis_title='',   # that X is year is obvious, no label
        hovermode='x unified',
        legend_title_text='Prog. Lang.'
    )  
    return fig

def create_scatter_plot(lang_1, lang_2):
    fig = px.scatter(
        df,
        lang_1,
        lang_2,
        template='simple_white',
        title=(f'{lang_1} vs. {lang_2}<br><sup>Scatter Plot/Correlation</sup>'),
    )
    return fig

def get_table_data(lang_1, lang_2):
    df_table = (
        df
        .select(pl.col('DATE', lang_1, lang_2))
        .sort('DATE', descending=False)
    )
    return df_table

#----- LOAD AND CLEAN DATA -----------------------------------------------------
df = (
    pl.read_csv(
        'Popularity of Programming Languages from 2004 to 2024.csv',
        ignore_errors=True
        )
    .with_columns(DATE = pl.col('Date').str.to_date(format='%b-%y'))
    .select('DATE', pl.all().exclude('Date', 'DATE')) 
    
    # move DATE to 1st col, get rid of Date col (string)
    .with_columns(pl.all().fill_null(strategy="zero"))
    
    # use sum_horizontal to see how close row totals are to 100%
    .with_columns(
        PCT_TOT = (pl.sum_horizontal(cs.numeric()))
    )
)
print(f'{df['PCT_TOT'].min() = }')
print(f'{df['PCT_TOT'].max() = }')
print('hi')
print(df)

drop_down_list = sorted(list(df.columns)[1:-1])
print(f'{drop_down_list = }')

grid = dag.AgGrid(
    rowData=[],
    columnDefs=[{"field": i, 'filter': True, 'sortable': True} for i in df.columns],
    dashGridOptions={"pagination": True},
    id='data_table'
)

#----- DASH APP ----------------------------------------------------------------
app = Dash(external_stylesheets=[dbc.themes.SANDSTONE])

app.layout = dbc.Container([
    html.Hr(style=style_space),
    html.H2(
        'Programming Language Popularity, 2004 to 2024', 
        style={
            'text-align': 'left'
        }
    ),
    html.Hr(style=style_space),
    html.Div([
        html.P(
            f'Citation: {citation}',
            style={
                'text-align': 'left',
                'margin-top': '20px',
                'font-style':
                'italic','font-size':
                '16px',
                'color': 'gray'
                }
            ),
        html.Hr(style=style_space)
    ]),

    dbc.Row(
        [dcc.Dropdown(       
            drop_down_list,       # list of choices
            ['Python', 'Perl'],   # defaults
            id='lang_dropdown', 
            style={'width': '50%'},
            multi=True,  # first 2 are used, all other ignored
            ),
        ],
    ),

    html.Div(id='dd-output-container'),

    dbc.Row([
        dbc.Col([grid]),
    ]),

    dbc.Row([
        dbc.Col(dcc.Graph(id='scatter_plot')),    
        dbc.Col(dcc.Graph(id='line_plot'),),
    ])
])

@app.callback(
        Output('line_plot', 'figure'),
        Output('scatter_plot', 'figure'),
        Output('data_table', 'rowData'),
        Input('lang_dropdown', 'value'),
)

def update_dashboard(selected_values):
    df_selected = df.select(pl.col('DATE', selected_values[0], selected_values[1]))
    line_plot = create_line_plot(selected_values[0], selected_values[1])
    scatter_plot  = create_scatter_plot(selected_values[0], selected_values[1])
    return line_plot, scatter_plot, df_selected.to_dicts()

#----- RUN THE APP -------------------------------------------------------------
if __name__ == "__main__":
    app.run(debug=True)
Py.Cafe

programming-language-popularity

Programming Language Popularity, 2004 to 2024