import pandas as pd
import umap
print(dir(umap))
print(umap.__path__)
from umap import UMAP
from sklearn.pipeline import make_pipeline
# seems impossible to install
# pip install "embetter[text]"
# from embetter.text import SentenceEncoder
# Build a sentence encoder pipeline with UMAP at the end.
# print(SentenceEncoder)
# enc = SentenceEncoder('all-MiniLM-L6-v2')
umap = UMAP()
text_emb_pipeline = make_pipeline(
umap
)
# Load sentences
sentences = list(pd.read_csv("text.csv")['text'])
# Calculate embeddings
X_tfm = text_emb_pipeline.fit_transform(sentences)
# Write to disk. Note! Text column must be named "text"
df = pd.DataFrame({"text": sentences})
df['x'] = X_tfm[:, 0]
df['y'] = X_tfm[:, 1]
from bulk.widgets import BaseTextExplorer
widget = BaseTextExplorer(df)
page = widget
## can we use the following for the SentenceEncoder
# import solara
# import numpy as np
# clicks = solara.reactive(0)
# input = solara.reactive("I love transformers")
# from transformers_js_py import import_transformers_js
# @solara.lab.task
# async def run(input):
# transformers = await import_transformers_js()
# pipeline = transformers.pipeline
# # Allocate a pipeline for sentiment-analysis
# pipe = await pipeline("feature-extraction")
# out = await pipe(input)
# # [{'label': 'POSITIVE', 'score': 0.999817686}]
# return np.array(out.tolist()).shape
# @solara.component
# def Page():
# if run.error:
# solara.Error(repr(run.exception))
# with solara.Card("Sentiment analysis"):
# solara.ProgressLinear(run.pending)
# with solara.Div():
# solara.InputText(label="Input", value=input)
# solara.Button(label=f"Analyze sentiment", on_click=lambda: run(input.value), color="primary", filled=True)
# if run.finished:
# solara.Text(repr(run.value))