Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

langchain[patch]: make BooleanOutputParser check words not substrings #20064

Merged
merged 2 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 19 additions & 15 deletions libs/langchain/langchain/output_parsers/boolean.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from langchain_core.output_parsers import BaseOutputParser


Expand All @@ -19,24 +21,26 @@ def parse(self, text: str) -> bool:
boolean

"""
cleaned_upper_text = text.strip().upper()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe adding .split() is sufficient to make the required change?

cleaned_upper_text = text.strip().upper().split()

Copy link
Contributor Author

@casperdcl casperdcl Apr 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really, as "I believe so (YES)" will .split() into [..., "(YES)"]. Meanwhile the re.findall will remove the surrounding punctuation i.e. [..., "YES"].

if (
self.true_val.upper() in cleaned_upper_text
and self.false_val.upper() in cleaned_upper_text
):
raise ValueError(
f"Ambiguous response. Both {self.true_val} and {self.false_val} in "
f"received: {text}."
truthy = {
val.upper() for val in re.findall(
rf"\b({self.true_val}|{self.false_val})\b",
text,
flags=re.IGNORECASE | re.MULTILINE
)
elif self.true_val.upper() in cleaned_upper_text:
}
if self.true_val.upper() in truthy:
if self.false_val.upper() in truthy:
raise ValueError(
f"Ambiguous response. Both {self.true_val} and {self.false_val} "
f"in received: {text}."
)
return True
elif self.false_val.upper() in cleaned_upper_text:
elif self.false_val.upper() in truthy:
return False
else:
raise ValueError(
f"BooleanOutputParser expected output value to include either "
f"{self.true_val} or {self.false_val}. Received {text}."
)
raise ValueError(
f"BooleanOutputParser expected output value to include either "
f"{self.true_val} or {self.false_val}. Received {text}."
)

@property
def _type(self) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ def test_boolean_output_parser_parse() -> None:
result = parser.parse("Not relevant (NO)")
assert result is False

# Test valid input
result = parser.parse("NOW this is relevant (YES)")
assert result is True

# Test ambiguous input
try:
parser.parse("yes and no")
Expand Down