community[patch]: Fix sparkllm embeddings api bug. (#19122)

- **Description:** Fix sparkllm embeddings api bug. @baskaryan PTAL
langchain-ai · Apr 26, 2024 · 1994040 · 1994040
1 parent 42f6d7c
commit 1994040
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 35 deletions.
diff --git a/docs/docs/integrations/text_embedding/sparkllm.ipynb b/docs/docs/integrations/text_embedding/sparkllm.ipynb
@@ -25,14 +25,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-03-15T09:36:13.753824100Z",
+     "start_time": "2024-03-15T09:36:13.225834400Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from langchain_community.embeddings import SparkLLMTextEmbeddings\n",
     "\n",
     "embeddings = SparkLLMTextEmbeddings(\n",
-    "    spark_app_id=\"sk-*\", spark_api_key=\"\", spark_api_secret=\"\"\n",
+    "    spark_app_id=\"<spark_app_id>\",\n",
+    "    spark_api_key=\"<spark_api_key>\",\n",
+    "    spark_api_secret=\"<spark_api_secret>\",\n",
     ")"
    ]
   },
@@ -45,44 +52,67 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-03-15T09:36:25.436201400Z",
+     "start_time": "2024-03-15T09:36:25.313456600Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "[-0.043609619140625,\n 0.2017822265625,\n 0.0270843505859375,\n -0.250244140625,\n -0.024993896484375,\n -0.0382080078125,\n 0.06207275390625,\n -0.0146331787109375]"
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "import os\n",
+    "text_q = \"Introducing iFlytek\"\n",
     "\n",
-    "os.environ[\"SPARK_APP_ID\"] = \"YOUR_APP_ID\"\n",
-    "os.environ[\"SPARK_API_KEY\"] = \"YOUR_API_KEY\"\n",
-    "os.environ[\"SPARK_API_SECRET\"] = \"YOUR_API_SECRET\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "text_1 = \"iFLYTEK is a well-known intelligent speech and artificial intelligence publicly listed company in the Asia-Pacific Region. Since its establishment, the company is devoted to cornerstone technological research in speech and languages, natural language understanding, machine learning, machine reasoning, adaptive learning, and has maintained the world-leading position in those domains. The company actively promotes the development of A.I. products and their sector-based applications, with visions of enabling machines to listen and speak, understand and think, creating a better world with artificial intelligence.\"\n",
-    "text_2 = \"iFLYTEK Open Platform was launched in 2010 by iFLYTEK as China’s first Artificial Intelligence open platform for Mobile Internet and intelligent hardware developers.\"\n",
+    "text_1 = \"Science and Technology Innovation Company Limited, commonly known as iFlytek, is a leading Chinese technology company specializing in speech recognition, natural language processing, and artificial intelligence. With a rich history and remarkable achievements, iFlytek has emerged as a frontrunner in the field of intelligent speech and language technologies.iFlytek has made significant contributions to the advancement of human-computer interaction through its cutting-edge innovations. Their advanced speech recognition technology has not only improved the accuracy and efficiency of voice input systems but has also enabled seamless integration of voice commands into various applications and devices.The company's commitment to research and development has been instrumental in its success. iFlytek invests heavily in fostering talent and collaboration with academic institutions, resulting in groundbreaking advancements in speech synthesis and machine translation. Their dedication to innovation has not only transformed the way we communicate but has also enhanced accessibility for individuals with disabilities.\"\n",
+    "\n",
+    "text_2 = \"Moreover, iFlytek's impact extends beyond domestic boundaries, as they actively promote international cooperation and collaboration in the field of artificial intelligence. They have consistently participated in global competitions and contributed to the development of international standards.In recognition of their achievements, iFlytek has received numerous accolades and awards both domestically and internationally. Their contributions have revolutionized the way we interact with technology and have paved the way for a future where voice-based interfaces play a vital role.Overall, iFlytek is a trailblazer in the field of intelligent speech and language technologies, and their commitment to innovation and excellence deserves commendation.\"\n",
     "\n",
-    "query_result = embeddings.embed_query(text_2)\n",
-    "query_result"
+    "query_result = embeddings.embed_query(text_q)\n",
+    "query_result[:8]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-03-15T09:36:54.657224Z",
+     "start_time": "2024-03-15T09:36:54.404690400Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "[-0.161865234375,\n 0.58984375,\n 0.998046875,\n 0.365966796875,\n 0.72900390625,\n 0.6015625,\n -0.8408203125,\n -0.2666015625]"
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "doc_result = embeddings.embed_documents([text_1, text_2])\n",
-    "doc_result"
+    "doc_result[0][:8]"
    ]
   }
  ],
  "metadata": {
   "language_info": {
    "name": "python"
+  },
+  "kernelspec": {
+   "name": "python3",
+   "language": "python",
+   "display_name": "Python 3 (ipykernel)"
   }
  },
  "nbformat": 4,

diff --git a/libs/community/langchain_community/embeddings/sparkllm.py b/libs/community/langchain_community/embeddings/sparkllm.py
@@ -70,14 +70,21 @@ def _embed(self, texts: List[str], host: str) -> Optional[List[List[float]]]:
             api_key=self.spark_api_key.get_secret_value(),
             api_secret=self.spark_api_secret.get_secret_value(),
         )
-        content = self._get_body(self.spark_app_id.get_secret_value(), texts)
-        response = requests.post(
-            url, json=content, headers={"content-type": "application/json"}
-        ).text
-        res_arr = self._parser_message(response)
-        if res_arr is not None:
-            return res_arr.tolist()
-        return None
+        embed_result: list = []
+        for text in texts:
+            query_context = {"messages": [{"content": text, "role": "user"}]}
+            content = self._get_body(
+                self.spark_app_id.get_secret_value(), query_context
+            )
+            response = requests.post(
+                url, json=content, headers={"content-type": "application/json"}
+            ).text
+            res_arr = self._parser_message(response)
+            if res_arr is not None:
+                embed_result.append(res_arr.tolist())
+            else:
+                embed_result.append(None)
+        return embed_result
 
     def embed_documents(self, texts: List[str]) -> Optional[List[List[float]]]:  # type: ignore[override]
         """Public method to get embeddings for a list of documents.
@@ -145,7 +152,7 @@ def _parse_url(request_url: str) -> Url:
         return u
 
     @staticmethod
-    def _get_body(appid: str, text: List[str]) -> Dict[str, Any]:
+    def _get_body(appid: str, text: dict) -> Dict[str, Any]:
         body = {
             "header": {"app_id": appid, "uid": "39769795890", "status": 3},
             "parameter": {"emb": {"feature": {"encoding": "utf8"}}},