Skip to content

Commit

Permalink
openai[patch]: remove numpy dep (langchain-ai#18034)
Browse files Browse the repository at this point in the history
  • Loading branch information
efriis authored and al1p-R committed Feb 27, 2024
1 parent 04bbfea commit 6e618d8
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 115 deletions.
44 changes: 36 additions & 8 deletions libs/partners/openai/langchain_openai/embeddings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
cast,
)

import numpy as np
import openai
import tiktoken
from langchain_core.embeddings import Embeddings
Expand Down Expand Up @@ -209,9 +208,11 @@ def validate_environment(cls, values: Dict) -> Dict:
"please use the `AzureOpenAIEmbeddings` class."
)
client_params = {
"api_key": values["openai_api_key"].get_secret_value()
if values["openai_api_key"]
else None,
"api_key": (
values["openai_api_key"].get_secret_value()
if values["openai_api_key"]
else None
),
"organization": values["openai_organization"],
"base_url": values["openai_api_base"],
"timeout": values["request_timeout"],
Expand Down Expand Up @@ -346,8 +347,22 @@ def _get_len_safe_embeddings(
average_embedded = average_embedded.model_dump()
average = average_embedded["data"][0]["embedding"]
else:
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
embeddings[i] = (average / np.linalg.norm(average)).tolist()
# should be same as
# average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
total_weight = sum(num_tokens_in_batch[i])
average = [
sum(
val * weight
for val, weight in zip(embedding, num_tokens_in_batch[i])
)
/ total_weight
for embedding in zip(*_result)
]

# should be same as
# embeddings[i] = (average / np.linalg.norm(average)).tolist()
magnitude = sum(val**2 for val in average) ** 0.5
embeddings[i] = [val / magnitude for val in average]

return embeddings

Expand Down Expand Up @@ -456,8 +471,21 @@ async def _aget_len_safe_embeddings(
average_embedded = average_embedded.model_dump()
average = average_embedded["data"][0]["embedding"]
else:
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
embeddings[i] = (average / np.linalg.norm(average)).tolist()
# should be same as
# average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
total_weight = sum(num_tokens_in_batch[i])
average = [
sum(
val * weight
for val, weight in zip(embedding, num_tokens_in_batch[i])
)
/ total_weight
for embedding in zip(*_result)
]
# should be same as
# embeddings[i] = (average / np.linalg.norm(average)).tolist()
magnitude = sum(val**2 for val in average) ** 0.5
embeddings[i] = [val / magnitude for val in average]

return embeddings

Expand Down

0 comments on commit 6e618d8

Please sign in to comment.