From 8e2671a26510dd1938601e0086152b2d0775479e Mon Sep 17 00:00:00 2001
From: Splice86 <davidbalaun@hotmail.com>
Date: Fri, 10 Nov 2023 01:37:24 -0600
Subject: [PATCH] Update to README and other minor changes

---
 .idea/.gitignore                              |  3 ++
 .idea/inspectionProfiles/Project_Default.xml  | 20 +++++++++++
 .../inspectionProfiles/profiles_settings.xml  |  6 ++++
 .idea/misc.xml                                |  4 +++
 .idea/modules.xml                             |  8 +++++
 .idea/tabbyAPI.iml                            | 10 ++++++
 .idea/vcs.xml                                 |  6 ++++
 README.md                                     | 20 +++++++----
 llm.py                                        | 19 +++++++++--
 main.py                                       | 33 ++++++++-----------
 10 files changed, 101 insertions(+), 28 deletions(-)
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/inspectionProfiles/Project_Default.xml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/tabbyAPI.iml
 create mode 100644 .idea/vcs.xml
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..d378e48
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,20 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="7">
+            <item index="0" class="java.lang.String" itemvalue="scipy" />
+            <item index="1" class="java.lang.String" itemvalue="transformers" />
+            <item index="2" class="java.lang.String" itemvalue="sounddevice" />
+            <item index="3" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="4" class="java.lang.String" itemvalue="librosa" />
+            <item index="5" class="java.lang.String" itemvalue="torch" />
+            <item index="6" class="java.lang.String" itemvalue="flask" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..6931d08
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (tabbyAPI)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..f9dcddc
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/tabbyAPI.iml" filepath="$PROJECT_DIR$/.idea/tabbyAPI.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/tabbyAPI.iml b/.idea/tabbyAPI.iml
new file mode 100644
index 0000000..74d515a
--- /dev/null
+++ b/.idea/tabbyAPI.iml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/README.md b/README.md
index 1fcb2ce..bd7ca42 100644
--- a/README.md
+++ b/README.md
@@ -74,13 +74,19 @@ The tabbyAPI application provides the following endpoint:
 ### Example Request (using `curl`)
 
 
-curl -X POST "http://localhost:8000/generate-text" -H "Content-Type: application/json" -d '{
-    "model": "your_model_name",
-    "messages": [
-        {"role": "user", "content": "Say this is a test!"}
-    ],
-    "temperature": 0.7
-}'
+curl http://127.0.0.1:8000/generate-text \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Your_Model_Path",
+    "prompt": "A tabby is a",
+    "max_tokens": 200,
+    "temperature": 1,
+    "top_p": 0.9,
+    "seed": 10,
+    "stream": true,
+    "token_repetition_penalty": 0.5,
+    "stop": ["###"]
+  }'
 
 
 ### Parameter Guide
diff --git a/llm.py b/llm.py
index b0a2336..19167a3 100644
--- a/llm.py
+++ b/llm.py
@@ -11,6 +11,8 @@ from exllamav2.generator import (
     ExLlamaV2Sampler
 )
 import time
+
+
 class ModelManager:
     def __init__(self, model_directory: str = None):
         if model_directory is None:
@@ -24,12 +26,25 @@ class ModelManager:
         self.model.load_autosplit(self.cache)
         self.tokenizer = ExLlamaV2Tokenizer(self.config)
         self.generator = ExLlamaV2BaseGenerator(self.model, self.cache, self.tokenizer)
-    def generate_text(self, prompt: str, max_new_tokens: int = 150,seed: int = random.randint(0,999999) ):
+
+    def generate_text(self,
+                      prompt: str,
+                      max_tokens: int = 150,
+                      temperature=0.5,
+                      seed: int = random.randint(0, 999999),
+                      token_repetition_penalty: float = 1.0,
+                      stop: list = None):
         try:
             self.generator.warmup()
             time_begin = time.time()
+            settings = ExLlamaV2Sampler.Settings()
+            settings.token_repetition_penalty = token_repetition_penalty
+
+            if stop:
+                settings.stop_sequence = stop
+
             output = self.generator.generate_simple(
-                prompt, ExLlamaV2Sampler.Settings(), max_new_tokens, seed=seed
+                prompt, settings, max_tokens, seed=seed
             )
             time_end = time.time()
             time_total = time_end - time_begin
diff --git a/main.py b/main.py
index 1e0e66a..7efab37 100644
--- a/main.py
+++ b/main.py
@@ -7,13 +7,19 @@ from uvicorn import run
 app = FastAPI()
 
 # Initialize the modelManager with a default model path
-default_model_path = "~/Models/SynthIA-7B-v2.0-5.0bpw-h6-exl2"
+default_model_path = "/home/david/Models/SynthIA-7B-v2.0-5.0bpw-h6-exl2"
 modelManager = ModelManager(default_model_path)
-
+print(output)
 class TextRequest(BaseModel):
-    model: str
-    messages: list[dict]
-    temperature: float
+    model: str = None  # Make the "model" field optional with a default value of None
+    prompt: str
+    max_tokens: int = 200
+    temperature: float = 1
+    top_p: float = 0.9
+    seed: int = 10
+    stream: bool = False
+    token_repetition_penalty: float = 1.0
+    stop: list = None
 
 class TextResponse(BaseModel):
     response: str
@@ -23,20 +29,9 @@ class TextResponse(BaseModel):
 def generate_text(request: TextRequest):
     global modelManager
     try:
-        model_path = request.model
-
-        if model_path and model_path != modelManager.config.model_path:
-            # Check if the specified model path exists
-            if not os.path.exists(model_path):
-                raise HTTPException(status_code=400, detail="Model path does not exist")
-
-            # Reinitialize the modelManager with the new model path
-            modelManager = ModelManager(model_path)
-
-        messages = request.messages
-        user_message = next(msg["content"] for msg in messages if msg["role"] == "user")
-
-        output, generation_time = modelManager.generate_text(user_message)
+        prompt = request.prompt  # Get the prompt from the request
+        user_message = prompt  # Assuming that prompt is equivalent to the user's message
+        output, generation_time = modelManager.generate_text(prompt=user_message)
         return {"response": output, "generation_time": generation_time}
     except RuntimeError as e:
         raise HTTPException(status_code=500, detail=str(e))