PyCafe - Solara - GPT-4 Chatbot with OpenAI Streaming

app.py
requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""
# Chatbot

A way to create a chatbot using OpenAI's GPT-4 API, but using your locally running ollama server

On OSX make sure you run:
launchctl setenv OLLAMA_ORIGINS "*"

See https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-configure-ollama-server for
details

"""

import os
from typing import List, cast

from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam
from typing_extensions import TypedDict

import solara
import solara.lab


class MessageDict(TypedDict):
    role: str  # "user" or "assistant"
    content: str


messages: solara.Reactive[List[MessageDict]] = solara.reactive([])
model = solara.reactive("llama2-uncensored")

openai = AsyncOpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama', # shouldn't matter
)


def no_api_key_message():
    messages.value = [
        {
            "role": "assistant",
            "content": "No OpenAI API key found. Please set your OpenAI API key in the environment variable `OPENAI_API_KEY`.",
        },
    ]


@solara.lab.task
async def promt_ai(message: str):
    if openai is None:
        no_api_key_message()
        return

    messages.value = [
        *messages.value,
        {"role": "user", "content": message},
    ]
    # The part below can be replaced with a call to your own
    response = await openai.chat.completions.create(
        model=model.value,
        # our MessageDict is compatible with the OpenAI types
        messages=cast(List[ChatCompletionMessageParam], messages.value),
        stream=True,
    )
    # start with an empty reply message, so we render and empty message in the chat
    # while the AI is thinking
    messages.value = [*messages.value, {"role": "assistant", "content": ""}]
    # and update it with the response
    async for chunk in response:
        if chunk.choices[0].finish_reason == "stop":  # type: ignore
            return
        # replace the last message element with the appended content
        delta = chunk.choices[0].delta.content
        assert delta is not None
        updated_message: MessageDict = {
            "role": "assistant",
            "content": messages.value[-1]["content"] + delta,
        }
        # replace the last message element with the appended content
        # which will update the UI
        messages.value = [*messages.value[:-1], updated_message]


@solara.component
def Page():
    solara.InputText("ollama model", model)
    with solara.Column(
        style={"width": "100%", "height": "50vh"},
    ):
        with solara.lab.ChatBox():
            for item in messages.value:
                with solara.lab.ChatMessage(
                    user=item["role"] == "user",
                    avatar=False,
                    name=model.value if item["role"] == "assistant" else "User",
                    color="rgba(0,0,0, 0.06)" if item["role"] == "assistant" else "#ff991f",
                    avatar_background_color="primary" if item["role"] == "assistant" else None,
                    border_radius="20px",
                ):
                    solara.Markdown(item["content"])
        if promt_ai.pending:
            solara.Text("I'm thinking...", style={"font-size": "1rem", "padding-left": "20px"})
            solara.ProgressLinear()
        solara.lab.ChatInput(send_callback=promt_ai, disabled=promt_ai.pending)
Py.Cafe

chatbot-ollama

GPT-4 Chatbot with OpenAI Streaming