From b0aee483529c1c4227b5a87ee91f7c34fb4ab81a Mon Sep 17 00:00:00 2001 From: Russell Ballestrini <russell.ballestrini@gmail.com> Date: Thu, 21 Nov 2024 10:16:46 -0500 Subject: [PATCH 1/2] google gemini has entered the chat. modified: README.rst modified: app.py --- README.rst | 6 +++++- app.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index d42a34b..cf53f3a 100644 --- a/README.rst +++ b/README.rst @@ -81,9 +81,10 @@ Set up optional environment variables for your AWS, OpenAI, MistralAI, or togeth export MISTRAL_API_KEY="your_mistralai_api_key" export TOGETHER_API_KEY="your_togetherai_api_key" export GROQ_API_KEY="your_groq_api_key" + export XAI_API_KEY="your_twitter_x_ai_api_key_for_grok" + export GOOGLE_API_KEY="your_google_gemini_api_key" export VLLM_API_KEY="not-needed" export VLLM_ENDPOINT="http://localhost:18888/v1" - export XAI_API_KEY="your_twitter_x_ai_api_key_for_grok" To start the application with socket.io run:: @@ -128,6 +129,9 @@ To interact with the various language models, you can use the following commands - For Groq Llama-2, send a message with ``groq/llama2`` and include your prompt. - For Groq Llama-3, send a message with ``groq/llama3`` and include your prompt. - For Groq Gemma, send a message with ``groq/gemma`` and include your prompt. +- For Google Gemini Flash, send a message with ``gemini-flash`` and include your prompt. +- For Google Gemini Flash 8B, send a message with ``gemini-flash-8b`` and include your prompt. +- For Google Gemini Pro, send a message with ``gemini-pro`` and include your prompt. - For Twitter/X AI Grok, send a message with ``grok-beta`` and include your prompt. - For vLLM Hermes, send a message with ``vllm/hermes-llama-3`` and include your prompt. - For Dall-e-3, send a message with ``dall-e-3`` and include your prompt. diff --git a/app.py b/app.py index d8dfb0f..73910d6 100644 --- a/app.py +++ b/app.py @@ -122,6 +122,9 @@ HELP_MESSAGE = """ - `groq/llama2`: For Groq Llama-2, send a message with `groq/llama2` and include your prompt. - `groq/llama3`: For Groq Llama-3, send a message with `groq/llama3` and include your prompt. - `groq/gemma`: For Groq Gemma, send a message with `groq/gemma` and include your prompt. +- `gemini-flash`: For Google Gemini Flash, send a message with `gemini-flash` and include your prompt. +- `gemini-flash-8b`: For Google Gemini Flash 8B, send a message with `gemini-flash-8b` and include your prompt. +- `gemini-pro`: For Google Gemini Pro, send a message with `gemini-pro` and include your prompt. - `grok-beta`: For twitter/xai Grok, send a message with `grok-beta` and include your prompt. - `vllm/hermes-llama-3`: For vLLM Hermes, send a message with `vllm/hermes-llama-3` and include your prompt. - `dall-e-3`: For Dall-e-3, send a message with `dall-e-3` and include your prompt. @@ -589,6 +592,7 @@ def handle_message(data): or "vllm/" in data["message"] or "groq/" in data["message"] or "grok-beta" in data["message"] + or "gemini-" in data["message"] ): # Emit a temporary message indicating that the llm is processing emit( @@ -652,6 +656,27 @@ def handle_message(data): room.name, model_name="grok-beta", ) + if "gemini-flash" in data["message"]: + gevent.spawn( + chat_gpt, + data["username"], + room.name, + model_name="gemini-1.5-flash-002", + ) + if "gemini-flash-8b" in data["message"]: + gevent.spawn( + chat_gpt, + data["username"], + room.name, + model_name="gemini-1.5-flash-8b", + ) + if "gemini-pro" in data["message"]: + gevent.spawn( + chat_gpt, + data["username"], + room.name, + model_name="gemini-1.5-pro-002", + ) if "mistral-tiny" in data["message"]: gevent.spawn( chat_mistral, @@ -989,17 +1014,24 @@ def get_openai_client_and_model(model_name="NousResearch/Hermes-3-Llama-3.1-8B") vllm_endpoint = os.environ.get("VLLM_ENDPOINT") vllm_api_key = os.environ.get("VLLM_API_KEY", "not-needed") xai_api_key = os.environ.get("XAI_API_KEY") + google_api_key = os.environ.get("GOOGLE_API_KEY") is_openai_model = "gpt" in model_name.lower() or "o1" in model_name.lower() is_xai_model = "grok-" in model_name.lower() + is_google_model = "gemini-" in model_name.lower() is_vllm_model = True - if is_openai_model or is_xai_model: + if is_openai_model or is_xai_model or is_google_model: is_vllm_model = False if is_vllm_model: openai_client = OpenAI(base_url=vllm_endpoint, api_key=vllm_api_key) elif is_xai_model: openai_client = OpenAI(base_url="https://api.x.ai/v1", api_key=xai_api_key) + elif is_google_model: + openai_client = OpenAI( + base_url="https://generativelanguage.googleapis.com/v1beta/openai/", + api_key=google_api_key, + ) else: openai_client = OpenAI() @@ -1050,6 +1082,7 @@ def chat_gpt(username, room_name, model_name="gpt-4o-mini"): chunks = openai_client.chat.completions.create( model=model_name, messages=chat_history, + n=1, temperature=temperature, stream=True, ) -- GitLab From c8ffe15e66c5878f97cb2caf5caa1186c403891d Mon Sep 17 00:00:00 2001 From: Russell Ballestrini <russell.ballestrini@gmail.com> Date: Thu, 21 Nov 2024 10:33:32 -0500 Subject: [PATCH 2/2] fix all openai_client.chat calls to have n=1 for google api modified: app.py --- app.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/app.py b/app.py index 73910d6..773a351 100644 --- a/app.py +++ b/app.py @@ -1625,6 +1625,7 @@ def gpt_generate_room_title(messages): messages=chat_history, model=model_name, # or any appropriate model max_tokens=20, + n=1, ) title = response.choices[0].message.content @@ -2822,7 +2823,11 @@ def generate_grading(chat_history, rubric): try: completion = openai_client.chat.completions.create( - model=model_name, messages=messages, max_tokens=1000, temperature=0.7 + model=model_name, + messages=messages, + max_tokens=1000, + temperature=0.7, + n=1, ) grading = completion.choices[0].message.content.strip() return grading @@ -2869,6 +2874,7 @@ def categorize_response(question, response, buckets, tokens_for_ai): completion = openai_client.chat.completions.create( model=model_name, messages=messages, + n=1, max_tokens=10, temperature=0, ) @@ -2904,7 +2910,7 @@ def generate_ai_feedback( try: completion = openai_client.chat.completions.create( - model=model_name, messages=messages, max_tokens=1000, temperature=0.7 + model=model_name, messages=messages, max_tokens=1000, temperature=0.7, n=1 ) feedback = completion.choices[0].message.content.strip() return feedback @@ -2961,7 +2967,7 @@ def translate_text(text, target_language): try: completion = openai_client.chat.completions.create( - model=model_name, messages=messages, max_tokens=2000, temperature=0.7 + model=model_name, messages=messages, max_tokens=2000, temperature=0.7, n=1 ) translation = completion.choices[0].message.content.strip() return translation -- GitLab