PyCafe - Solara - GPT-4 Chatbot with OpenAI Streaming

app.py
requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from typing import List, cast

from typing_extensions import TypedDict

from ollama import AsyncClient

import solara
import solara.lab

##################################################################
#
#   IMPORTANT!
#
#   Before running this demo, you should:
#
#   1. Install Ollama from https://ollama.com/download
#   2. Pull the model from the repository with
#
#       ollama pull deepseek-r1:8b
#
#   3. Run the AI model locally by running
#
#       OLLAMA_ORIGIN="*" ollama serve
#
##################################################################

class MessageDict(TypedDict):
    role: str  # "user" or "assistant"
    content: str
    chain_of_reason: str | None


messages: solara.Reactive[List[MessageDict]] = solara.reactive([])

ai_client = AsyncClient(
    host="http://localhost:11434/",
)

@solara.lab.task
async def promt_ai(message: str):
    thinking = False

    messages.value = [
        *messages.value,
        {"role": "user", "content": message, "chain_of_reason": None},
    ]
    # The part below can be replaced with a call to your own
    response = ai_client.chat(
        model="deepseek-r1:8b",
        # our MessageDict is compatible with the OpenAI types
        messages=messages.value,
        stream=True,
    )
    # start with an empty reply message, so we render and empty message in the chat
    # while the AI is thinking
    messages.value = [*messages.value, {"role": "assistant", "content": "", "chain_of_reason": None}]
    # and update it with the response
    async for chunk in await response:
        if chunk["done"] and chunk["done_reason"] == "stop":
            return
        # replace the last message element with the appended content
        delta = chunk["message"]["content"]
        if "<think>" == delta:
            thinking = True
            continue
        if "</think>" == delta:
            thinking = False
            continue
        assert delta is not None
        message_content = messages.value[-1]["content"]
        chain_of_reason = messages.value[-1]["chain_of_reason"] or ""
        if thinking:
            chain_of_reason += delta
        else:
            message_content += delta
        updated_message: MessageDict = {
            "role": "assistant",
            "content": message_content,
            "chain_of_reason": chain_of_reason,
        }
        # replace the last message element with the appended content
        # which will update the UI
        messages.value = [*messages.value[:-1], updated_message]


@solara.component
def Page():
    with solara.Column(
        style={"width": "100%", "height": "50vh" if len(messages.value) == 0 else "calc(100% - 44px)"},
    ):
        with solara.lab.ChatBox():
            for item in messages.value:
                with solara.lab.ChatMessage(
                    user=item["role"] == "user",
                    avatar=False,
                    name="Deepseek" if item["role"] == "assistant" else "User",
                    color="rgba(0,0,0, 0.06)" if item["role"] == "assistant" else "#ff991f",
                    avatar_background_color="primary" if item["role"] == "assistant" else None,
                    border_radius="20px",
                ):
                    if item["chain_of_reason"] is not None:
                        with solara.Details(summary="Chain of Thought"):
                            solara.Markdown(item["chain_of_reason"])
                    solara.Markdown(item["content"])
        if promt_ai.pending:
            solara.Text("I'm thinking...", style={"font-size": "1rem", "padding-left": "20px"})
            solara.ProgressLinear()
        # if we don't call .key(..) with a unique key, the ChatInput component will be re-created
        # and we'll lose what we typed.
        solara.lab.ChatInput(send_callback=promt_ai, disabled=promt_ai.pending).key("input")
Py.Cafe

chatbot-ollama

GPT-4 Chatbot with OpenAI Streaming