import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html
import dash_ag_grid as dag
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
# Load and clean dataset
df = pd.read_csv('Post45_NEAData_Final.csv')
# Сlean dataset
df = df.fillna({
'birth_year': df['birth_year'].median(),
'state': 'Unknown',
'hometown': 'Unknown'
})
# Analize datase
# Create a new feature: 'age of writer'
df['age of writer'] = df['nea_grant_year'] - df['birth_year']
# Categorize writers based on age groups
df['age_group'] = pd.cut(
df['age of writer'],
bins=[0, 30, 50, 70, 100],
labels=['<30', '30-50', '50-70', '70+']
)
# Count of grants by state
grants_by_state = df['us_state'].value_counts().reset_index()
grants_by_state.columns = ['us_state', 'grant_count']
# Count of writers by hometown
writers_by_hometown = df['hometown'].value_counts().reset_index()
writers_by_hometown.columns = ['hometown', 'writer_count']
# Geocoding hometowns for map visualization
geolocator = Nominatim(user_agent='nea_analysis')
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
# Adding latitude and longitude for hometowns
writers_by_hometown['location'] = writers_by_hometown['hometown'].apply(geocode)
writers_by_hometown['latitude'] = writers_by_hometown['location'].apply(lambda loc: loc.latitude if loc else None)
writers_by_hometown['longitude'] = writers_by_hometown['location'].apply(lambda loc: loc.longitude if loc else None)
# Filter out rows without location data for visualization
writers_by_hometown_map = writers_by_hometown.dropna(subset=['latitude', 'longitude'])
# Counting universities
df['university_count'] = df[['ba', 'ba2', 'ma', 'ma2', 'phd', 'mfa', 'mfa2', 'post45_hathi_entry']].notna().sum(axis=1)
# Count of grants per year
grants_per_year = df.groupby('nea_grant_year').size().reset_index(name='grant_count')
# Create a Dash app
# Visualization: University count distribution
fig_university_count = px.histogram(
df,
x='university_count',
title='Distribution of University Count per Writer',
labels={'university_count': 'Number of Universities'},
nbins=10,
template='plotly_dark'
)
fig_university_count.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
# Visualization: University count vs. grant count
fig_university_vs_grant = px.bar(
df.groupby('university_count').size().reset_index(name='grant_count'),
x='university_count',
y='grant_count',
title='University Count vs. Grant Count',
labels={
'university_count': 'Number of Universities',
'grant_count': 'Number of Grants'
},
template='plotly_dark'
)
fig_university_vs_grant.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
# Visualization: Histogram
fig_histogram = px.histogram(
df,
x='age of writer',
color='age_group',
title='Distribution of Writers by Age at Time of Grant',
labels={'age of writer': 'Age of Writer'},
nbins=20,
template='plotly_dark'
)
fig_histogram.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
# visualization: Age group distribution
fig_pie = px.pie(
df,
names='age_group',
title='Age Group Distribution of Writers',
template='plotly_dark'
)
fig_pie.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
# Visualization: Grants by State (Map)
fig_map = px.choropleth(
grants_by_state,
locations='us_state',
locationmode='USA-states',
color='grant_count',
color_continuous_scale='Viridis',
scope='usa',
title='Grants by State'
)
fig_map.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
# Visualization: Writers by Hometown (Bar Chart)
fig_hometown = px.bar(
writers_by_hometown.iloc[1:11],
x='hometown',
y='writer_count',
title='Top 10 Hometowns of Writers',
labels={
'hometown': 'Hometown',
'writer_count': 'Writer Count'
},
template='plotly_dark'
)
fig_hometown.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
# Visualization: Writers by Hometown (Map)
fig_hometown_map = px.scatter_geo(
writers_by_hometown_map,
lat='latitude',
lon='longitude',
size='writer_count',
hover_name='hometown',
title='Writers by Hometown (Map)',
template='plotly_dark'
)
fig_hometown_map.update_layout(
geo=dict(
showland=True,
landcolor='#1e1e1e',
showocean=True,
oceancolor='#000033',
projection_type='natural earth'
),
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
# Visualization: Grants per year
fig_grants_per_year = px.line(
grants_per_year,
x='nea_grant_year',
y='grant_count',
title='Grants per Year',
labels={
'nea_grant_year': 'Year',
'grant_count': 'Number of Grants'
},
template='plotly_dark'
)
fig_grants_per_year.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
# Create a Dash app
app = Dash()
grid = dag.AgGrid(
rowData=df.to_dict("records"),
columnDefs=[{"field": i} for i in df.columns],
dashGridOptions={"pagination": True}
)
app.layout = html.Div([
grid,
dcc.Graph(figure=fig_histogram),
dcc.Graph(figure=fig_pie),
dcc.Graph(figure=fig_university_count),
dcc.Graph(figure=fig_university_vs_grant),
dcc.Graph(figure=fig_map),
dcc.Graph(figure=fig_hometown),
dcc.Graph(figure=fig_hometown_map),
dcc.Graph(figure=fig_grants_per_year)
])
if __name__ == "__main__":
app.run_server(debug=True)