# Import section
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import datetime
import re
import pydeck as pdk
# SETTING PAGE CONFIG TO WIDE MODE
st.set_page_config(layout="wide")
# Main page title
st.title("Streamlit app")
# Create the main sidebar section, useful for the filters
st.sidebar.title("Streamlit Demo")
st.sidebar.header('Índice')
radio = st.sidebar.radio(label="",
options=["1. Cargar los datos",
"2. Crear gráficos",
"3. Crear mapas"])
# Functions to include
def read_data(rev_path, places_path):
reviews_df = pd.read_csv(rev_path)
reviews_df['date'] = pd.to_datetime(reviews_df['date']).dt.date
places_df = pd.read_csv(places_path)
places_df = places_df.rename(columns={"id": "listing_id"})
return reviews_df, places_df
def enrich_dataset(df1, df2):
# Enrich the dataframe
enriched_df = pd.merge(left=df1, right=df2, on=['listing_id'], how='outer', indicator=True)
# Clean null dates
enriched_df = clean_null_dates(enriched_df)
# Split time columns into year, month and day
enriched_df = split_time_columns(enriched_df)
# Format price
enriched_df['price'] = enriched_df['price'].apply(lambda price: float(re.sub("[^\d\.]", "", price)))
return enriched_df
def clean_null_dates(df):
return df[~df['date'].isnull()]
def split_time_columns(df):
df['year'] = df['date'].apply(lambda date: date.year)
df['month'] = df['date'].apply(lambda date: date.month)
df['day'] = df['date'].apply(lambda date: date.day)
return df
def get_min_max_date(df):
min_date = df['date'].min()
max_date = df['date'].max()
return min_date, max_date
def customize_interval(min_date, max_date):
start_date = st.sidebar.date_input('Fecha de inicio ', min_date)
end_date = st.sidebar.date_input('Fecha de fin ', max_date)
if start_date < end_date:
st.sidebar.success('Fecha de inicio: `%s`\n\nFecha de fin: `%s`' % (start_date, end_date))
else:
st.sidebar.error('Error: La fecha final no puede ser anterior a la de inicio')
return start_date, end_date
# Get basic data
reviews_df, places_df = read_data("reviews_sample.csv", "listings.csv")
# Specific per section
if radio == '1. Cargar los datos':
# Show basic data
st.subheader(f"Cargar los datos")
st.write(f"Dataset de reviews")
st.write(reviews_df.head(500))
if st.checkbox('Filtrar columnas de reviews', value=False):
# Filter columns in sidebar
st.sidebar.subheader(f"Filtrado de columnas")
rev_cols_to_show = st.sidebar.multiselect('Dataset de reviews:', reviews_df.columns)
st.write(f"Dataset de reviews filtrado")
st.write(reviews_df[rev_cols_to_show].head(500))
st.write(f"Cargamos el dataset de alojamientos")
st.write(places_df.head(500))
if st.checkbox('Filtrar columnas de alojamientos', value=False):
# Filter columns in sidebar
st.sidebar.subheader(f"Filtrado de columnas")
places_cols_to_show = st.sidebar.multiselect('Dataset de alojamientos:', places_df.columns)
st.write(f"Dataset de reviews filtrado")
st.write(places_df[places_cols_to_show].head(500))
if st.checkbox('Mostrar información de los alojamientos', value=False):
st.sidebar.subheader(f"Filtrado de alojamientos por id")
listings_ids = st.sidebar.multiselect('Id de los alojamientos:', set(places_df['listing_id'].values))
for id in listings_ids:
st.markdown("- - -")
place = places_df[places_df['listing_id'] == id]
st.markdown("### **__Nombre del alojamiento__**")
st.write(place['name'].values[0])
st.markdown("### **__Descripción del alojamiento__**")
st.write(place['description'].values[0], unsafe_allow_html=True)
st.markdown("### **__Foto del alojamiento__**")
picture_url = place['picture_url'].values[0]
st.image(picture_url)
st.markdown("- - -")
if radio == '2. Crear gráficos':
st.subheader(f"Creación de gráficos")
# Enrich the dataframe
enriched_df = enrich_dataset(reviews_df, places_df)
st.write(f"Resultado después de enriquecer los datos. Dataset de reviews con información de alojamientos")
# Get the min and max review date
min_date, max_date = get_min_max_date(enriched_df)
if st.checkbox('Explorar los datos por fecha de review', value=False):
st.sidebar.title(f"Filtrar por fecha de review")
custom_min_date, custom_max_date = customize_interval(min_date, max_date)
enriched_df = enriched_df[(enriched_df['date']>=custom_min_date) & (enriched_df['date']<=custom_max_date)]
fig_dates = px.histogram(enriched_df, x="date", title="Distribución de reviews por día")
st.plotly_chart(fig_dates)
st.markdown("#### Evolución de la relación calidad-precio por barrio y año-mes")
reviews_neighbourhood_evol_df = enriched_df.groupby(['year', 'month', 'neighbourhood_cleansed', 'neighbourhood_group_cleansed']).agg({'price': 'mean', 'review_scores_value':'mean', 'listing_id':'count'}).reset_index()
reviews_neighbourhood_evol_df['yearMonth'] = reviews_neighbourhood_evol_df.apply(lambda row: str(row['year']) + '-' + str(row['month']).zfill(2), axis=1)
reviews_neighbourhood_evol_df = reviews_neighbourhood_evol_df.rename(columns={"listing_id": "count"})
reviews_neighbourhood_evol_df = reviews_neighbourhood_evol_df[['yearMonth','neighbourhood_cleansed','neighbourhood_group_cleansed', 'count','review_scores_value','price']]
st.write(reviews_neighbourhood_evol_df)
fig_anim = px.scatter(reviews_neighbourhood_evol_df, x="review_scores_value", y="price", animation_frame="yearMonth", animation_group="neighbourhood_cleansed", size='count',
color="neighbourhood_group_cleansed", hover_name="neighbourhood_cleansed")
st.plotly_chart(fig_anim)
if radio == '3. Crear mapas':
st.subheader(f"Creación de mapas")
# Enrich the dataframe
enriched_df = enrich_dataset(reviews_df, places_df)
st.write(f"Resultado después de enriquecer los datos. Dataset de reviews con información de alojamientos")
# Plot a map
st.write(f"Mapa de todos los alojamientos")
st.map(enriched_df)
# Filtrar por zona
if st.checkbox('Filtrar por zona y periodo de tiempo en Valencia', value=False):
st.sidebar.subheader(f"Filtrado de las zonas")
zone = st.sidebar.selectbox('Selecciona la zona:', set(enriched_df['neighbourhood_group_cleansed'].values))
year = st.sidebar.selectbox('Selecciona el año:', set(enriched_df['year'].values))
filtered_df = enriched_df[(enriched_df['neighbourhood_group_cleansed'] == zone) & (enriched_df['year'] == year)]
zone_midpoint = (filtered_df["latitude"].mean(), filtered_df["longitude"].mean())
st.pydeck_chart(pdk.Deck(
map_style='mapbox://styles/mapbox/light-v9',
initial_view_state=pdk.ViewState(
latitude=zone_midpoint[0],
longitude=zone_midpoint[1],
zoom=11.5,
pitch=50,
),
layers=[
pdk.Layer(
'HexagonLayer',
data=filtered_df[['longitude', 'latitude']],
get_position='[longitude, latitude]',
radius=200,
elevation_scale=4,
elevation_range=[0, 1000],
pickable=True,
extruded=True,
),
pdk.Layer(
'ScatterplotLayer',
data=filtered_df[['longitude', 'latitude']],
get_position='[longitude, latitude]',
get_color='[200, 30, 0, 160]',
get_radius=200,
),
],
))
if st.checkbox('Mostrar los datos', value=False):
grouped_filtered_df = filtered_df.groupby(['neighbourhood_group_cleansed','neighbourhood_cleansed']).agg(count_reviews=('listing_id', 'count'))
st.write(grouped_filtered_df)