Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

langchain[patch]: make BooleanOutputParser check words not substrings #20064

Merged
merged 2 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
41 changes: 24 additions & 17 deletions libs/langchain/langchain/output_parsers/boolean.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from langchain_core.output_parsers import BaseOutputParser


Expand All @@ -17,26 +19,31 @@ def parse(self, text: str) -> bool:

Returns:
boolean

"""
cleaned_upper_text = text.strip().upper()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe adding .split() is sufficient to make the required change?

cleaned_upper_text = text.strip().upper().split()

Copy link
Contributor Author

@casperdcl casperdcl Apr 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really, as "I believe so (YES)" will .split() into [..., "(YES)"]. Meanwhile the re.findall will remove the surrounding punctuation i.e. [..., "YES"].

if (
self.true_val.upper() in cleaned_upper_text
and self.false_val.upper() in cleaned_upper_text
):
raise ValueError(
f"Ambiguous response. Both {self.true_val} and {self.false_val} in "
f"received: {text}."
)
elif self.true_val.upper() in cleaned_upper_text:
regexp = rf"\b({self.true_val}|{self.false_val})\b"

truthy = {
val.upper()
for val in re.findall(regexp, text, flags=re.IGNORECASE | re.MULTILINE)
}
if self.true_val.upper() in truthy:
if self.false_val.upper() in truthy:
raise ValueError(
f"Ambiguous response. Both {self.true_val} and {self.false_val} "
f"in received: {text}."
)
return True
elif self.false_val.upper() in cleaned_upper_text:
elif self.false_val.upper() in truthy:
if self.true_val.upper() in truthy:
raise ValueError(
f"Ambiguous response. Both {self.true_val} and {self.false_val} "
f"in received: {text}."
)
Comment on lines +37 to +41
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch will never be executed

Suggested change
if self.true_val.upper() in truthy:
raise ValueError(
f"Ambiguous response. Both {self.true_val} and {self.false_val} "
f"in received: {text}."
)

return False
else:
raise ValueError(
f"BooleanOutputParser expected output value to include either "
f"{self.true_val} or {self.false_val}. Received {text}."
)
raise ValueError(
f"BooleanOutputParser expected output value to include either "
f"{self.true_val} or {self.false_val}. Received {text}."
)

@property
def _type(self) -> str:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from langchain.output_parsers.boolean import BooleanOutputParser


Expand All @@ -24,16 +26,16 @@ def test_boolean_output_parser_parse() -> None:
result = parser.parse("Not relevant (NO)")
assert result is False

# Test valid input
result = parser.parse("NOW this is relevant (YES)")
assert result is True

# Test ambiguous input
try:
parser.parse("yes and no")
assert False, "Should have raised ValueError"
except ValueError:
pass

# Test invalid input
try:
parser.parse("INVALID")
assert False, "Should have raised ValueError"
except ValueError:
pass
with pytest.raises(ValueError):
parser.parse("YES NO")

with pytest.raises(ValueError):
parser.parse("NO YES")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add missing unit test to catch the other branch

# Bad input
with pytest.raises(ValueError):
parser.parse("BOOM")