Py.Cafe

hoerldavid/

promoter-enhancer-interactive-fig2

Interactive plot for Stumberger et al.: Nanoscale Dynamics of Enhancer-Promoter Interactions during Exit from Pluripotency https://www.biorxiv.org/content/10.1101/2025.01.20.633941v1

DocsPricing
  • app.py
  • fig2_revision.csv
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# NOTE: necessary to plot from pandas
import hvplot.pandas

from math import pi
import numpy as np
import pandas as pd
import panel as pn
import holoviews as hv
from bokeh.palettes import Category10
from bokeh.plotting import figure
from bokeh.transform import cumsum


def get_histogram_df(data, num_bins=25, full_cumulative=True):
    """
    Calculate histogram in advance before so we can do lineplot.
    """

    df_stats = []

    for i, dfi in data.groupby(['gene', 'celltype', 'neighbor_rank']):

        # get histogram
        max_distance = data['distance_um'].max()
        counts, bins = np.histogram(dfi['distance_um'], bins=np.linspace(0, max_distance, num_bins+1))
        probs = counts / counts.sum()
        bin_centers = (bins[:-1] + bins[1:]) / 2

        # long data frames with normal and cumulative prob
        df_hist = pd.DataFrame({"prob": probs, "distance_um": bin_centers})
        df_hist['stat'] = 'probability'

        if full_cumulative: # use all values for cumulative prob.
            n = len(dfi)
            probs_cumulative = np.linspace(1 / n, 1, n)
            probs_cumulative = np.concatenate([[0.0], probs_cumulative, [1.0]])
            dists_table = dfi['distance_um'].sort_values().values
            dists_table = np.concatenate([[0.0], dists_table, [max_distance]])
            df_hist_cum = pd.DataFrame({"prob": probs_cumulative, "distance_um": dists_table})
        else: # get cumulative prob. only from histogram bins
            probs_cumulative = np.cumsum(probs)
            df_hist_cum = pd.DataFrame({"prob": probs_cumulative, "distance_um": bin_centers})
        df_hist_cum['stat'] = 'probability_cumulative'

        # add both to results
        df_stats_i = pd.concat([df_hist, df_hist_cum])
        df_stats_i[['gene', 'celltype', 'neighbor_rank']] = i
        df_stats.append(df_stats_i)

    df_stats = pd.concat(df_stats)

    # set colors
    df_stats['color'] = df_stats['celltype'].map({'naive': 'blue', 'primed': 'red'})

    return df_stats


def get_piechart_num_contacts(df_stats, gene, celltype, distance_cutoff):

    """
    Get a Bokeh Pie Chart of fraction of promoters with 0, 1, 2, ... enhancers withing a certain distance.
    Following https://docs.bokeh.org/en/latest/docs/user_guide/topics/pie.html
    """

    # select just celltype and gene of interest
    df_stats = df_stats[(df_stats['celltype'] == celltype) & (df_stats.gene==gene)]

    # select just cumulative probability
    df_stats = df_stats[df_stats.stat == 'probability_cumulative']

    # cumulative probability for each enhancer to be within cutoff (maximum entry below cutoff)
    # NOTE: this is the fraction of cells with *at least* n enhancers
    prob_atleast_n = df_stats[df_stats.distance_um < distance_cutoff].groupby(['neighbor_rank']).prob.max()

    # convert the at-least-n probabilities to exactly-n
    # prob(0) = 1 - prob(>=1), prob(1) = prob(>=1) - prob(>=2), ...
    prob_exactly_n = np.array(list(prob_atleast_n.shift(1, fill_value=1) - prob_atleast_n) + [prob_atleast_n.iloc[-1]])
    df_prob_exactly_n = pd.DataFrame({'prob': prob_exactly_n}).reset_index(names='num_neighbors')

    # probabilities to fraction of circle
    df_prob_exactly_n['angle'] = df_prob_exactly_n['prob']/df_prob_exactly_n['prob'].sum() * 2*pi
    # set colors from palette
    df_prob_exactly_n['color'] = Category10[len(df_prob_exactly_n)]

    p = figure(height=250, width=300, title=f"{celltype}", toolbar_location=None,
            tools="hover", tooltips="@num_neighbors: @prob", x_range=(-0.5, 1.0))
    r = p.wedge(x=0, y=1, radius=0.4,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", fill_color='color', legend_field='num_neighbors', source=df_prob_exactly_n)

    p.axis.axis_label=None
    p.axis.visible=False
    p.grid.grid_line_color = None

    bokeh_pane = pn.pane.Bokeh(p, theme="dark_minimal")
    return bokeh_pane


def get_histogram_plot(df_stats, gene, vline_pos, cumulative=True):

    # select gene-of-interest and either normal or cumulative probability
    stat_selected = 'probability_cumulative' if cumulative else 'probability'
    df_stats = df_stats[(df_stats.gene == gene) & (df_stats.stat == stat_selected)]

    # make plot
    title = "Distance distribution by neighbor rank"
    histogram_plot = (
        df_stats.hvplot('distance_um', 'prob', label=title, height=500, width=700, by=['celltype', 'neighbor_rank'], color='color')
        * hv.VLine(vline_pos) # add vertical line at chosen cutoff
    )

    return pn.pane.HoloViews(histogram_plot, align='center')


# load data
data = pd.read_csv('fig2_revision.csv')

# sort by fov and then by distance, to attach neighbor rank
data = data.sort_values(['fov', 'distance_um'])
data['neighbor_rank']= np.concatenate([np.arange(1, len(dfi)+1) for _, dfi in data.groupby('fov')])

pn.extension(design='material')

# widgets for interactive plot settings
cumulative = pn.widgets.Checkbox(name='Cumulative Histogram?', value=True)
gene = pn.widgets.Select(name='Gene', options=list(set(data.gene)), value='Nanog')
num_bins = pn.widgets.IntInput(name='Num. Bins', value=30, start=10, end=80, step=5)
distance_cutoff = pn.widgets.FloatSlider(name='Distance cutoff [µm]', value=0.25, start=0, end=1.5, step=0.01)

# re-calculate histogram df if num_bins changes
df_stats = pn.bind(get_histogram_df, data, num_bins)

# histogram plot and pie charts for naive, primed
hist_plot = pn.bind(get_histogram_plot, df_stats, gene, distance_cutoff, cumulative)
pie_naive = pn.bind(get_piechart_num_contacts, df_stats, gene, "naive", distance_cutoff)
pie_primed = pn.bind(get_piechart_num_contacts, df_stats, gene, "primed", distance_cutoff)

options = pn.WidgetBox("Plot Options", gene, distance_cutoff, cumulative, num_bins, align='center')
pn.Row(options, hist_plot, pn.Column("Fraction of cells with n contacts", pie_naive, pie_primed, align='center'), align='center').servable()