Skip to content

Commit

Permalink
community[patch]: Fix sparkllm embeddings api bug. (#19122)
Browse files Browse the repository at this point in the history
- **Description:** Fix sparkllm embeddings api bug.
@baskaryan PTAL
  • Loading branch information
liugddx authored and hinthornw committed Apr 26, 2024
1 parent 42f6d7c commit 1994040
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 35 deletions.
82 changes: 56 additions & 26 deletions docs/docs/integrations/text_embedding/sparkllm.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-15T09:36:13.753824100Z",
"start_time": "2024-03-15T09:36:13.225834400Z"
}
},
"outputs": [],
"source": [
"from langchain_community.embeddings import SparkLLMTextEmbeddings\n",
"\n",
"embeddings = SparkLLMTextEmbeddings(\n",
" spark_app_id=\"sk-*\", spark_api_key=\"\", spark_api_secret=\"\"\n",
" spark_app_id=\"<spark_app_id>\",\n",
" spark_api_key=\"<spark_api_key>\",\n",
" spark_api_secret=\"<spark_api_secret>\",\n",
")"
]
},
Expand All @@ -45,44 +52,67 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-15T09:36:25.436201400Z",
"start_time": "2024-03-15T09:36:25.313456600Z"
}
},
"outputs": [
{
"data": {
"text/plain": "[-0.043609619140625,\n 0.2017822265625,\n 0.0270843505859375,\n -0.250244140625,\n -0.024993896484375,\n -0.0382080078125,\n 0.06207275390625,\n -0.0146331787109375]"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"text_q = \"Introducing iFlytek\"\n",
"\n",
"os.environ[\"SPARK_APP_ID\"] = \"YOUR_APP_ID\"\n",
"os.environ[\"SPARK_API_KEY\"] = \"YOUR_API_KEY\"\n",
"os.environ[\"SPARK_API_SECRET\"] = \"YOUR_API_SECRET\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text_1 = \"iFLYTEK is a well-known intelligent speech and artificial intelligence publicly listed company in the Asia-Pacific Region. Since its establishment, the company is devoted to cornerstone technological research in speech and languages, natural language understanding, machine learning, machine reasoning, adaptive learning, and has maintained the world-leading position in those domains. The company actively promotes the development of A.I. products and their sector-based applications, with visions of enabling machines to listen and speak, understand and think, creating a better world with artificial intelligence.\"\n",
"text_2 = \"iFLYTEK Open Platform was launched in 2010 by iFLYTEK as China’s first Artificial Intelligence open platform for Mobile Internet and intelligent hardware developers.\"\n",
"text_1 = \"Science and Technology Innovation Company Limited, commonly known as iFlytek, is a leading Chinese technology company specializing in speech recognition, natural language processing, and artificial intelligence. With a rich history and remarkable achievements, iFlytek has emerged as a frontrunner in the field of intelligent speech and language technologies.iFlytek has made significant contributions to the advancement of human-computer interaction through its cutting-edge innovations. Their advanced speech recognition technology has not only improved the accuracy and efficiency of voice input systems but has also enabled seamless integration of voice commands into various applications and devices.The company's commitment to research and development has been instrumental in its success. iFlytek invests heavily in fostering talent and collaboration with academic institutions, resulting in groundbreaking advancements in speech synthesis and machine translation. Their dedication to innovation has not only transformed the way we communicate but has also enhanced accessibility for individuals with disabilities.\"\n",
"\n",
"text_2 = \"Moreover, iFlytek's impact extends beyond domestic boundaries, as they actively promote international cooperation and collaboration in the field of artificial intelligence. They have consistently participated in global competitions and contributed to the development of international standards.In recognition of their achievements, iFlytek has received numerous accolades and awards both domestically and internationally. Their contributions have revolutionized the way we interact with technology and have paved the way for a future where voice-based interfaces play a vital role.Overall, iFlytek is a trailblazer in the field of intelligent speech and language technologies, and their commitment to innovation and excellence deserves commendation.\"\n",
"\n",
"query_result = embeddings.embed_query(text_2)\n",
"query_result"
"query_result = embeddings.embed_query(text_q)\n",
"query_result[:8]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-15T09:36:54.657224Z",
"start_time": "2024-03-15T09:36:54.404690400Z"
}
},
"outputs": [
{
"data": {
"text/plain": "[-0.161865234375,\n 0.58984375,\n 0.998046875,\n 0.365966796875,\n 0.72900390625,\n 0.6015625,\n -0.8408203125,\n -0.2666015625]"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc_result = embeddings.embed_documents([text_1, text_2])\n",
"doc_result"
"doc_result[0][:8]"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"kernelspec": {
"name": "python3",
"language": "python",
"display_name": "Python 3 (ipykernel)"
}
},
"nbformat": 4,
Expand Down
25 changes: 16 additions & 9 deletions libs/community/langchain_community/embeddings/sparkllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,21 @@ def _embed(self, texts: List[str], host: str) -> Optional[List[List[float]]]:
api_key=self.spark_api_key.get_secret_value(),
api_secret=self.spark_api_secret.get_secret_value(),
)
content = self._get_body(self.spark_app_id.get_secret_value(), texts)
response = requests.post(
url, json=content, headers={"content-type": "application/json"}
).text
res_arr = self._parser_message(response)
if res_arr is not None:
return res_arr.tolist()
return None
embed_result: list = []
for text in texts:
query_context = {"messages": [{"content": text, "role": "user"}]}
content = self._get_body(
self.spark_app_id.get_secret_value(), query_context
)
response = requests.post(
url, json=content, headers={"content-type": "application/json"}
).text
res_arr = self._parser_message(response)
if res_arr is not None:
embed_result.append(res_arr.tolist())
else:
embed_result.append(None)
return embed_result

def embed_documents(self, texts: List[str]) -> Optional[List[List[float]]]: # type: ignore[override]
"""Public method to get embeddings for a list of documents.
Expand Down Expand Up @@ -145,7 +152,7 @@ def _parse_url(request_url: str) -> Url:
return u

@staticmethod
def _get_body(appid: str, text: List[str]) -> Dict[str, Any]:
def _get_body(appid: str, text: dict) -> Dict[str, Any]:
body = {
"header": {"app_id": appid, "uid": "39769795890", "status": 3},
"parameter": {"emb": {"feature": {"encoding": "utf8"}}},
Expand Down

0 comments on commit 1994040

Please sign in to comment.