Skip to content

Commit

Permalink
langchain[patch]: make BooleanOutputParser check words not substrings (
Browse files Browse the repository at this point in the history
…langchain-ai#20064)

- **Description**: fixes BooleanOutputParser detecting sub-words ("NOW
this is likely (YES)" -> `True`, not `AmbiguousError`)
- **Issue(s)**: fixes langchain-ai#11408 (follow-up to langchain-ai#17810)
- **Dependencies**: None
- **GitHub handle**: @casperdcl

<!-- if unreviewd after a few days, @-mention one of baskaryan, efriis,
eyurtsev, hwchase17 -->

- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.
- [ ] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
  • Loading branch information
2 people authored and junkeon committed Apr 16, 2024
1 parent 5563fd0 commit ef2f561
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 29 deletions.
41 changes: 24 additions & 17 deletions libs/langchain/langchain/output_parsers/boolean.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from langchain_core.output_parsers import BaseOutputParser


Expand All @@ -17,26 +19,31 @@ def parse(self, text: str) -> bool:
Returns:
boolean
"""
cleaned_upper_text = text.strip().upper()
if (
self.true_val.upper() in cleaned_upper_text
and self.false_val.upper() in cleaned_upper_text
):
raise ValueError(
f"Ambiguous response. Both {self.true_val} and {self.false_val} in "
f"received: {text}."
)
elif self.true_val.upper() in cleaned_upper_text:
regexp = rf"\b({self.true_val}|{self.false_val})\b"

truthy = {
val.upper()
for val in re.findall(regexp, text, flags=re.IGNORECASE | re.MULTILINE)
}
if self.true_val.upper() in truthy:
if self.false_val.upper() in truthy:
raise ValueError(
f"Ambiguous response. Both {self.true_val} and {self.false_val} "
f"in received: {text}."
)
return True
elif self.false_val.upper() in cleaned_upper_text:
elif self.false_val.upper() in truthy:
if self.true_val.upper() in truthy:
raise ValueError(
f"Ambiguous response. Both {self.true_val} and {self.false_val} "
f"in received: {text}."
)
return False
else:
raise ValueError(
f"BooleanOutputParser expected output value to include either "
f"{self.true_val} or {self.false_val}. Received {text}."
)
raise ValueError(
f"BooleanOutputParser expected output value to include either "
f"{self.true_val} or {self.false_val}. Received {text}."
)

@property
def _type(self) -> str:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from langchain.output_parsers.boolean import BooleanOutputParser


Expand All @@ -24,16 +26,16 @@ def test_boolean_output_parser_parse() -> None:
result = parser.parse("Not relevant (NO)")
assert result is False

# Test valid input
result = parser.parse("NOW this is relevant (YES)")
assert result is True

# Test ambiguous input
try:
parser.parse("yes and no")
assert False, "Should have raised ValueError"
except ValueError:
pass

# Test invalid input
try:
parser.parse("INVALID")
assert False, "Should have raised ValueError"
except ValueError:
pass
with pytest.raises(ValueError):
parser.parse("YES NO")

with pytest.raises(ValueError):
parser.parse("NO YES")
# Bad input
with pytest.raises(ValueError):
parser.parse("BOOM")

0 comments on commit ef2f561

Please sign in to comment.