From b0aee483529c1c4227b5a87ee91f7c34fb4ab81a Mon Sep 17 00:00:00 2001
From: Russell Ballestrini <russell.ballestrini@gmail.com>
Date: Thu, 21 Nov 2024 10:16:46 -0500
Subject: [PATCH 1/2] google gemini has entered the chat.

	modified:   README.rst
	modified:   app.py
---
 README.rst |  6 +++++-
 app.py     | 35 ++++++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index d42a34b..cf53f3a 100644
--- a/README.rst
+++ b/README.rst
@@ -81,9 +81,10 @@ Set up optional environment variables for your AWS, OpenAI, MistralAI, or togeth
     export MISTRAL_API_KEY="your_mistralai_api_key"
     export TOGETHER_API_KEY="your_togetherai_api_key"
     export GROQ_API_KEY="your_groq_api_key"
+    export XAI_API_KEY="your_twitter_x_ai_api_key_for_grok"
+    export GOOGLE_API_KEY="your_google_gemini_api_key"
     export VLLM_API_KEY="not-needed"
     export VLLM_ENDPOINT="http://localhost:18888/v1"
-    export XAI_API_KEY="your_twitter_x_ai_api_key_for_grok"
 
 To start the application with socket.io run::
 
@@ -128,6 +129,9 @@ To interact with the various language models, you can use the following commands
 - For Groq Llama-2, send a message with ``groq/llama2`` and include your prompt.
 - For Groq Llama-3, send a message with ``groq/llama3`` and include your prompt.
 - For Groq Gemma, send a message with ``groq/gemma`` and include your prompt.
+- For Google Gemini Flash, send a message with ``gemini-flash`` and include your prompt.
+- For Google Gemini Flash 8B, send a message with ``gemini-flash-8b`` and include your prompt.
+- For Google Gemini Pro, send a message with ``gemini-pro`` and include your prompt.
 - For Twitter/X AI Grok, send a message with ``grok-beta`` and include your prompt.
 - For vLLM Hermes, send a message with ``vllm/hermes-llama-3`` and include your prompt.
 - For Dall-e-3, send a message with ``dall-e-3`` and include your prompt.
diff --git a/app.py b/app.py
index d8dfb0f..73910d6 100644
--- a/app.py
+++ b/app.py
@@ -122,6 +122,9 @@ HELP_MESSAGE = """
 - `groq/llama2`: For Groq Llama-2, send a message with `groq/llama2` and include your prompt.
 - `groq/llama3`: For Groq Llama-3, send a message with `groq/llama3` and include your prompt.
 - `groq/gemma`: For Groq Gemma, send a message with `groq/gemma` and include your prompt.
+- `gemini-flash`: For Google Gemini Flash, send a message with `gemini-flash` and include your prompt.
+- `gemini-flash-8b`: For Google Gemini Flash 8B, send a message with `gemini-flash-8b` and include your prompt.
+- `gemini-pro`: For Google Gemini Pro, send a message with `gemini-pro` and include your prompt.
 - `grok-beta`: For twitter/xai Grok, send a message with `grok-beta` and include your prompt.
 - `vllm/hermes-llama-3`: For vLLM Hermes, send a message with `vllm/hermes-llama-3` and include your prompt.
 - `dall-e-3`: For Dall-e-3, send a message with `dall-e-3` and include your prompt.
@@ -589,6 +592,7 @@ def handle_message(data):
         or "vllm/" in data["message"]
         or "groq/" in data["message"]
         or "grok-beta" in data["message"]
+        or "gemini-" in data["message"]
     ):
         # Emit a temporary message indicating that the llm is processing
         emit(
@@ -652,6 +656,27 @@ def handle_message(data):
                 room.name,
                 model_name="grok-beta",
             )
+        if "gemini-flash" in data["message"]:
+            gevent.spawn(
+                chat_gpt,
+                data["username"],
+                room.name,
+                model_name="gemini-1.5-flash-002",
+            )
+        if "gemini-flash-8b" in data["message"]:
+            gevent.spawn(
+                chat_gpt,
+                data["username"],
+                room.name,
+                model_name="gemini-1.5-flash-8b",
+            )
+        if "gemini-pro" in data["message"]:
+            gevent.spawn(
+                chat_gpt,
+                data["username"],
+                room.name,
+                model_name="gemini-1.5-pro-002",
+            )
         if "mistral-tiny" in data["message"]:
             gevent.spawn(
                 chat_mistral,
@@ -989,17 +1014,24 @@ def get_openai_client_and_model(model_name="NousResearch/Hermes-3-Llama-3.1-8B")
     vllm_endpoint = os.environ.get("VLLM_ENDPOINT")
     vllm_api_key = os.environ.get("VLLM_API_KEY", "not-needed")
     xai_api_key = os.environ.get("XAI_API_KEY")
+    google_api_key = os.environ.get("GOOGLE_API_KEY")
 
     is_openai_model = "gpt" in model_name.lower() or "o1" in model_name.lower()
     is_xai_model = "grok-" in model_name.lower()
+    is_google_model = "gemini-" in model_name.lower()
     is_vllm_model = True
-    if is_openai_model or is_xai_model:
+    if is_openai_model or is_xai_model or is_google_model:
         is_vllm_model = False
 
     if is_vllm_model:
         openai_client = OpenAI(base_url=vllm_endpoint, api_key=vllm_api_key)
     elif is_xai_model:
         openai_client = OpenAI(base_url="https://api.x.ai/v1", api_key=xai_api_key)
+    elif is_google_model:
+        openai_client = OpenAI(
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+            api_key=google_api_key,
+        )
     else:
         openai_client = OpenAI()
 
@@ -1050,6 +1082,7 @@ def chat_gpt(username, room_name, model_name="gpt-4o-mini"):
         chunks = openai_client.chat.completions.create(
             model=model_name,
             messages=chat_history,
+            n=1,
             temperature=temperature,
             stream=True,
         )
-- 
GitLab


From c8ffe15e66c5878f97cb2caf5caa1186c403891d Mon Sep 17 00:00:00 2001
From: Russell Ballestrini <russell.ballestrini@gmail.com>
Date: Thu, 21 Nov 2024 10:33:32 -0500
Subject: [PATCH 2/2] fix all openai_client.chat calls to have n=1 for google
 api

	modified:   app.py
---
 app.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/app.py b/app.py
index 73910d6..773a351 100644
--- a/app.py
+++ b/app.py
@@ -1625,6 +1625,7 @@ def gpt_generate_room_title(messages):
         messages=chat_history,
         model=model_name,  # or any appropriate model
         max_tokens=20,
+        n=1,
     )
 
     title = response.choices[0].message.content
@@ -2822,7 +2823,11 @@ def generate_grading(chat_history, rubric):
 
     try:
         completion = openai_client.chat.completions.create(
-            model=model_name, messages=messages, max_tokens=1000, temperature=0.7
+            model=model_name,
+            messages=messages,
+            max_tokens=1000,
+            temperature=0.7,
+            n=1,
         )
         grading = completion.choices[0].message.content.strip()
         return grading
@@ -2869,6 +2874,7 @@ def categorize_response(question, response, buckets, tokens_for_ai):
         completion = openai_client.chat.completions.create(
             model=model_name,
             messages=messages,
+            n=1,
             max_tokens=10,
             temperature=0,
         )
@@ -2904,7 +2910,7 @@ def generate_ai_feedback(
 
     try:
         completion = openai_client.chat.completions.create(
-            model=model_name, messages=messages, max_tokens=1000, temperature=0.7
+            model=model_name, messages=messages, max_tokens=1000, temperature=0.7, n=1
         )
         feedback = completion.choices[0].message.content.strip()
         return feedback
@@ -2961,7 +2967,7 @@ def translate_text(text, target_language):
 
     try:
         completion = openai_client.chat.completions.create(
-            model=model_name, messages=messages, max_tokens=2000, temperature=0.7
+            model=model_name, messages=messages, max_tokens=2000, temperature=0.7, n=1
         )
         translation = completion.choices[0].message.content.strip()
         return translation
-- 
GitLab