Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

community[minor]: Implement lazy_load() for WikipediaLoader #18680

Merged
merged 1 commit into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional
from typing import Iterator, Optional

from langchain_core.documents import Document

Expand Down Expand Up @@ -42,12 +42,12 @@ def __init__(
self.load_all_available_meta = load_all_available_meta
self.doc_content_chars_max = doc_content_chars_max

def load(self) -> List[Document]:
def lazy_load(self) -> Iterator[Document]:
"""
Loads the query result from Wikipedia into a list of Documents.

Returns:
List[Document]: A list of Document objects representing the loaded
A list of Document objects representing the loaded
Wikipedia pages.
"""
client = WikipediaAPIWrapper(
Expand All @@ -56,5 +56,4 @@ def load(self) -> List[Document]:
load_all_available_meta=self.load_all_available_meta,
doc_content_chars_max=self.doc_content_chars_max,
)
docs = client.load(self.query)
return docs
yield from client.load(self.query)
16 changes: 12 additions & 4 deletions libs/community/langchain_community/utilities/wikipedia.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Util that calls Wikipedia."""
import logging
from typing import Any, Dict, List, Optional
from typing import Any, Dict, Iterator, List, Optional

from langchain_core.documents import Document
from langchain_core.pydantic_v1 import BaseModel, root_validator
Expand Down Expand Up @@ -104,13 +104,21 @@ def load(self, query: str) -> List[Document]:

Returns: a list of documents.

"""
return list(self.lazy_load(query))

def lazy_load(self, query: str) -> Iterator[Document]:
"""
Run Wikipedia search and get the article text plus the meta information.
See

Returns: a list of documents.

"""
page_titles = self.wiki_client.search(
query[:WIKIPEDIA_MAX_QUERY_LENGTH], results=self.top_k_results
)
docs = []
for page_title in page_titles[: self.top_k_results]:
if wiki_page := self._fetch_page(page_title):
if doc := self._page_to_document(page_title, wiki_page):
docs.append(doc)
return docs
yield doc