Skip to content

Commit

Permalink
core[patch]: deprecate hwchase17/langchain-hub, address path traversal (
Browse files Browse the repository at this point in the history
langchain-ai#18600)

Deprecates the old langchain-hub repository. Does *not* deprecate the
new https://smith.langchain.com/hub

@PinkDraconian has correctly raised that in the event someone is loading
unsanitized user input into the `try_load_from_hub` function, they have
the ability to load files from other locations in github than the
hwchase17/langchain-hub repository.

This PR adds some more path checking to that function and deprecates the
functionality in favor of the hub built into LangSmith.
  • Loading branch information
efriis authored and Dave Bechberger committed Mar 29, 2024
1 parent 1009d43 commit d1b0e2f
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 2 deletions.
16 changes: 15 additions & 1 deletion libs/core/langchain_core/utils/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,28 @@

import requests

from langchain_core._api.deprecation import deprecated

DEFAULT_REF = os.environ.get("LANGCHAIN_HUB_DEFAULT_REF", "master")
LANGCHAINHUB_REPO = "https://raw.githubusercontent.com/hwchase17/langchain-hub/"
URL_BASE = os.environ.get(
"LANGCHAIN_HUB_URL_BASE",
"https://raw.githubusercontent.com/hwchase17/langchain-hub/{ref}/",
LANGCHAINHUB_REPO + "{ref}/",
)
HUB_PATH_RE = re.compile(r"lc(?P<ref>@[^:]+)?://(?P<path>.*)")

T = TypeVar("T")


@deprecated(
since="0.1.30",
removal="0.2",
message=(
"Using the hwchase17/langchain-hub "
"repo for prompts is deprecated. Please use "
"https://smith.langchain.com/hub instead."
),
)
def try_load_from_hub(
path: Union[str, Path],
loader: Callable[[str], T],
Expand All @@ -43,6 +55,8 @@ def try_load_from_hub(
# Instead, use PurePosixPath to ensure that forward slashes are used as the
# path separator, regardless of the operating system.
full_url = urljoin(URL_BASE.format(ref=ref), PurePosixPath(remote_path).__str__())
if not full_url.startswith(LANGCHAINHUB_REPO):
raise ValueError(f"Invalid hub path: {path}")

r = requests.get(full_url, timeout=5)
if r.status_code != 200:
Expand Down
21 changes: 20 additions & 1 deletion libs/core/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions libs/core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ pytest-watcher = "^0.3.4"
pytest-asyncio = "^0.21.1"
grandalf = "^0.8"
pytest-profiling = "^1.7.0"
responses = "^0.25.0"


[tool.poetry.group.test_integration]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import pytest
import responses

from langchain_core.utils.loading import DEFAULT_REF, URL_BASE, try_load_from_hub


Expand Down Expand Up @@ -94,3 +95,12 @@ def test_failed_request(mocked_responses: responses.RequestsMock) -> None:
with pytest.raises(ValueError, match=re.compile("Could not find file at .*")):
try_load_from_hub(f"lc://{path}", loader, "chains", {"json"})
loader.assert_not_called()


def test_path_traversal() -> None:
"""Test that a path traversal attack is prevented."""
path = "lc://chains/../../../../../../../../../it.json"
loader = Mock()

with pytest.raises(ValueError):
try_load_from_hub(path, loader, "chains", {"json"})

0 comments on commit d1b0e2f

Please sign in to comment.