Py.Cafe

iisakkirotko/

linkedin-connection-insights

LinkedIn Connection Insights

DocsPricing
  • Connections.csv
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from vizro import Vizro
import vizro.models as vm
from vizro.models.types import capture
import pandas as pd
import vizro.plotly.express as px

import pandas as pd
import requests

# Define the URL of the raw CSV file
# url = "https://raw.githubusercontent.com/stichbury/vizro_projects/main/linkedin_project/Connections.csv"

# Load the CSV data into a pandas DataFrame
from io import StringIO
# csv_data = StringIO(response.text)
# csv_data = file.read()
df = pd.read_csv("Connections.csv")
    
#################################
# Data: upload a file called Connections.csv by hitting the + NEW FILE on your left <----
# To  run this project with your own data visit https://www.linkedin.com/help/linkedin/answer/a1339364/downloading-your-account-data
# You will receive by email a zip file, which contains Connections.csv
# You'll need to edit it to remove the two lines of commentary at the top 
# Alternatively, use my fake data from here: https://github.com/stichbury/vizro_projects/blob/main/linkedin_project/Connections.csv 
################################

def role_chart():
# Plot the most common roles for my connections
    role_data_frame = df.copy()
    role_data_frame = role_data_frame['Position'].value_counts().nlargest(20).reset_index()
    role_data_frame.columns = ['Position', 'Frequency']
    fig = px.bar(role_data_frame, x='Position', y='Frequency', title='20 most frequent job titles', labels={'Position':'Position', 'Frequency':'Frequency'})
    return fig

def company_chart():
# Plot the most common companies among my connections
    company_data_frame = df.copy()
    company_data_frame = company_data_frame['Company'].value_counts().head(20).reset_index()
    company_data_frame.columns = ['Company', 'Count']
    fig = px.bar(company_data_frame, x='Company', y='Count', title='Top 20 most connected companies', labels={'Company':'Company', 'Count':'Count'})
    return fig

def growth_chart():
# Make a line graph to show the yearly growth of my connection number
    data_frame = df.copy()
    data_frame['Connected On'] = pd.to_datetime(data_frame['Connected On'])
    data_frame['Year'] = data_frame['Connected On'].dt.year
    yearly_connections = data_frame.groupby('Year').size().cumsum()
    yearly_connections = yearly_connections.reset_index()
    data_frame = yearly_connections.rename(columns={0: 'Yearly Connections'})

    fig = px.line(data_frame, x='Year', y='Yearly Connections', title='Yearly growth of my connections')
    return fig


def cumulative_chart():
# Make a bar chart to show connections made each year
    data_frame = df.copy()

    data_frame['Connected On'] = pd.to_datetime(data_frame['Connected On'])
    data_frame.set_index('Connected On', inplace=True)
    data_frame = data_frame.resample('YS').size().reset_index()

    data_frame.columns = ['Connected On', 'Count']
    data_frame.reset_index()
    
    fig = px.bar(data_frame, x='Connected On', y='Count', title='Connection count by year', labels={'Connected On':'Year', 'Count':'Count'})
    return fig


page = vm.Page(
    title="LinkedIn data",
    layout=vm.Layout(grid=[[0,1], [0,1], [2,3]]),
    components=[
        vm.Graph(figure=role_chart()),
        vm.Graph(figure=company_chart()),
        vm.Graph(figure=growth_chart()),
        vm.Graph(figure=cumulative_chart()),
    ],
)

dashboard = vm.Dashboard(pages=[page])
Vizro().build(dashboard).run()