Py.Cafe

caiquecober/

bank-of-canada-open-data-explorer

Bank of Canada Open Data Explorer

DocsPricing
  • app.py
  • custom.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

import pandas as pd
import streamlit as st
import plotly.express as px
import requests
from io import BytesIO
from pyvalet import ValetInterpreter

def load_time_series(chosen_series, group_series, api):
    r'''
    Retrieve metadata and data for the chosen time series.

    Parameters:
    -----------
    chosen_series : list
        List of the time series selected from the chosen data group.
    group_series : pandas.DataFrame
        Tabular data containing names, labels, and descriptions of the time series.
    api : pyvalet.ValetInterpreter
        Bank of Canada open data API client instance.

    Returns:
    --------
    pandas.DataFrame
        Tabular data containing the selected time series.
    '''
    
    # Initialize an empty DataFrame to store results
    data = pd.DataFrame()
    
    # Loop through each selected time series
    for series in chosen_series:
        selection = group_series[group_series.label == series].name.values[0]
        
        # Fetch observations for the selected series
        df = api.get_series_observations(selection, response_format='csv')
        
        # Set the date column as the index
        df['date'] = pd.to_datetime(df['date'], errors='coerce')  # Handle any parsing issues gracefully
        df = df.set_index('date')
        
        # Concatenate the new series to the main DataFrame
        data = pd.concat([data, df.iloc[:,:1]], axis=1)
    

    
    # Set the final index as datetime with proper formatting
    data.index.name = 'Date'
    data.index = pd.to_datetime(data.index)
    st.write(data)
    return data

# -------------------------------------------------------------------------------
st.set_page_config( # Configure page title, icon, layout, and sidebar state
    page_title='Bank of Canada Open Data Explorer',
    page_icon=':bank:',
    layout='wide',
    initial_sidebar_state='expanded')

# Retrieve and display the Bank of Canada's logo on the top of the sidebar
boc_logo = requests.get('https://logos-download.com/wp-content/uploads/'
    '2016/03/Bank_of_Canada_logo.png')
boc_logo = BytesIO(boc_logo.content)
st.sidebar.image(boc_logo, use_column_width=True)

# Define the web application title
st.title(':bank: Bank of Canada :maple_leaf: [Open Data]'
    '(https://github.com/tylercroberts/pyvalet) Explorer :mag:')
# -------------------------------------------------------------------------------

# -------------------------------------------------------------------------------
api_client = ValetInterpreter() # API Client for the Bank of Canada's open data
groups = api_client.list_groups() # Retrieve list of available data groups
group_options = groups.label[groups.label != 'delete'] # Filter data groups list

# Enable selection of a specific data group using a selectbox in the sidebar
chosen_group = st.sidebar.selectbox(
    label='Pick a group from which to select a time series:',
    options=['Click here to select...'] + group_options.tolist(),
    key='chosen_group'
)
if chosen_group == 'Click here to select...':
    st.warning('A Bank of Canada data group must be selected to proceed')
    st.stop()

# Retrieve metadata and data on the chosen data group
group_details, group_series = api_client.get_group_detail(
    groups[groups.label == chosen_group].name.values[0],
    response_format='csv'
)
# Parse the list of time series available in the selected data group
series_options = group_series.set_index(group_series.name).label.tolist()
# -------------------------------------------------------------------------------
# Section to select and filter date range of multiple time series in same group
with st.expander(label='Select time series from data group',
                      expanded=True):
    chosen_series = st.multiselect(
        label=f'Time series within the "{chosen_group}" data group:',
        options=series_options,
        key='chosen_series')
    if chosen_series == []:
        st.warning('At least one time series must be selected to proceed')
        st.stop()
    df = load_time_series(chosen_series, group_series, api_client)
    st.write('Please note that Streamlit limits date selection to last 10 years')
    if st.button('So you can click here to enable manual entry of any date'):
        overwrite_start_date = st.sidebar.text_input(
            label='Enter start date manually:',
            value=df.index[0].strftime('%Y/%m/%d'),
            max_chars=10,
            key='overwrite_start_date'
        )
        overwrite_end_date = st.sidebar.text_input(
            label='Enter end date manually:',
            value=df.index[-1].strftime('%Y/%m/%d'),
            max_chars=10,
            key='overwrite_end_date'
        )
    else:
        overwrite_start_date, overwrite_end_date = None, None

# Define and parse start_date and end_date variables from date_input widgets
invalid_date_range = False
min_streamlit_date = pd.Timestamp.today() - pd.DateOffset(years=10)

if overwrite_start_date is None:
    start_date = st.sidebar.date_input('Select start date:',
        df.index[0] if df.index[0] > min_streamlit_date else min_streamlit_date)
elif overwrite_start_date is not None:
    start_date = pd.to_datetime(overwrite_start_date)

if start_date < df.index[0] or start_date > df.index[-1]:
    st.sidebar.error('Error: Start date must be within the time series range of '
        f'{df.index[0].date()} to {df.index[-1].date()}')
    invalid_date_range = True

if overwrite_start_date is None:
    end_date = st.sidebar.date_input('Select end date:', df.index[-1])
elif overwrite_end_date is not None:
    end_date = pd.to_datetime(overwrite_end_date)

if end_date < df.index[0] or end_date > df.index[-1]:
    st.sidebar.error('Error: End date must be within the time series range of '
        f'{df.index[0].date()} to {df.index[-1].date()}')
    invalid_date_range = True

if invalid_date_range:
    st.warning('Valid start and end dates must be chosen to proceed')
    st.stop()
# -------------------------------------------------------------------------------
# Enable downloading of the filtered selection of time series data as CSV files
if st.sidebar.button('Download Filtered Selection as CSV'):
    tmp_download_link = download_link(
        df[start_date:end_date],
        f'{chosen_group.replace(" ", "_")}.csv',
        'Click here to download the data within the time range you selected!'
    )
    st.sidebar.markdown(tmp_download_link, unsafe_allow_html=True)

# Enable downloading of the entire selection of time series data as CSV files
if st.sidebar.button('Download Entire Selection as CSV'):
    tmp_download_link = download_link(
        df,
        f'{chosen_group.replace(" ", "_")}.csv',
        'Click here to download the entire series you selected!'
    )
    st.sidebar.markdown(tmp_download_link, unsafe_allow_html=True)

# Expander section to display the selected time series data as interactive plot
with st.expander(label='Plot selected time series data'):
    # toggle_smoothing = st.checkbox(
    #     label='Toggle spline smoothing',
    #     key='toggle_smoothing')        
    #try: # Spline smoothing option of plots with few observations
    fig = px.line(
            df[start_date:end_date],
        )
    # fig.update_xaxes(rangeslider_visible=True)
    st.plotly_chart(fig, use_container_width=True)