Skip to content

Commit

Permalink
community[patch]: Implement lazy_load() for EverNoteLoader (langchain…
Browse files Browse the repository at this point in the history
…-ai#18538)

Covered by `test_evernote_loader.py`
  • Loading branch information
cbornet authored and Dave Bechberger committed Mar 29, 2024
1 parent 5695707 commit c599948
Showing 1 changed file with 21 additions and 22 deletions.
43 changes: 21 additions & 22 deletions libs/community/langchain_community/document_loaders/evernote.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,33 +40,32 @@ def __init__(self, file_path: str, load_single_document: bool = True):
self.file_path = file_path
self.load_single_document = load_single_document

def load(self) -> List[Document]:
"""Load documents from EverNote export file."""
documents = [
Document(
page_content=note["content"],
metadata={
**{
key: value
for key, value in note.items()
if key not in ["content", "content-raw", "resource"]
def _lazy_load(self) -> Iterator[Document]:
for note in self._parse_note_xml(self.file_path):
if note.get("content") is not None:
yield Document(
page_content=note["content"],
metadata={
**{
key: value
for key, value in note.items()
if key not in ["content", "content-raw", "resource"]
},
**{"source": self.file_path},
},
**{"source": self.file_path},
},
)
for note in self._parse_note_xml(self.file_path)
if note.get("content") is not None
]
)

def lazy_load(self) -> Iterator[Document]:
"""Load documents from EverNote export file."""
if not self.load_single_document:
return documents

return [
Document(
page_content="".join([document.page_content for document in documents]),
yield from self._lazy_load()
else:
yield Document(
page_content="".join(
[document.page_content for document in self._lazy_load()]
),
metadata={"source": self.file_path},
)
]

@staticmethod
def _parse_content(content: str) -> str:
Expand Down

0 comments on commit c599948

Please sign in to comment.