psf · JelleZijlstra · Mar 7, 2023 · Dec 21, 2020 · Dec 21, 2020 · Oct 26, 2022
diff --git a/CHANGES.md b/CHANGES.md
@@ -32,6 +32,7 @@
   string lambda values are now wrapped in parentheses (#3440)
 - Exclude string type annotations from improved string processing; fix crash when the
   return type annotation is stringified and spans across multiple lines (#3462)
+- Improve handling of multiline strings by changing line split behavior (#1879)
 
 ### Configuration
 

diff --git a/docs/the_black_code_style/future_style.md b/docs/the_black_code_style/future_style.md
@@ -152,3 +152,51 @@ with open("bla.txt") as f, open("x"):
 async def main():
     await asyncio.sleep(1)
 ```
+
+### Improved multiline string handling
+
+_Black_ is smarter when formatting multiline strings, especially in function arguments,
+to avoid introducing extra line breaks. Previously, it would always consider multiline
+strings as not fitting on a single line. With this new feature, _Black_ looks at the
+context around the multiline string to decide if it should be inlined or split to a
+separate line. For example, when a multiline string is passed to a function, _Black_
+will only split the multiline string if a line is too long or if multiple arguments are
+being passed.
+
+For example, _Black_ will reformat
+
+```python
+textwrap.dedent(
+    """\
+    This is a
+    multiline string
+"""
+)
+```
+
+to:
+
+```python
+textwrap.dedent("""\
+    This is a
+    multiline string
+""")
+```
+
+And:
+
+```python
+MULTILINE = """
+foobar
+""".replace(
+    "\n", ""
+)
+```
+
+to:
+
+```python
+MULTILINE = """
+foobar
+""".replace("\n", "")
+```
diff --git a/src/black/linegen.py b/src/black/linegen.py
@@ -2,7 +2,7 @@
 Generating lines of code.
 """
 import sys
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from enum import Enum, auto
 from functools import partial, wraps
 from typing import Collection, Iterator, List, Optional, Set, Union, cast
@@ -492,7 +492,7 @@ def transform_line(
         and not line.should_split_rhs
         and not line.magic_trailing_comma
         and (
-            is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
+            is_line_short_enough(line, mode=mode, line_str=line_str)
             or line.contains_unsplittable_type_ignore()
         )
         and not (line.inside_brackets and line.contains_standalone_comments())
@@ -516,24 +516,20 @@ def _rhs(
             bracket pair instead.
             """
             for omit in generate_trailers_to_omit(line, mode.line_length):
-                lines = list(
-                    right_hand_split(line, mode.line_length, features, omit=omit)
-                )
+                lines = list(right_hand_split(line, mode, features, omit=omit))
                 # Note: this check is only able to figure out if the first line of the
                 # *current* transformation fits in the line length.  This is true only
                 # for simple cases.  All others require running more transforms via
                 # `transform_line()`.  This check doesn't know if those would succeed.
-                if is_line_short_enough(lines[0], line_length=mode.line_length):
+                if is_line_short_enough(lines[0], mode=mode):
                     yield from lines
                     return
 
             # All splits failed, best effort split with no omits.
             # This mostly happens to multiline strings that are by definition
             # reported as not fitting a single line, as well as lines that contain
             # trailing commas (those have to be exploded).
-            yield from right_hand_split(
-                line, line_length=mode.line_length, features=features
-            )
+            yield from right_hand_split(line, mode, features=features)
 
         # HACK: nested functions (like _rhs) compiled by mypyc don't retain their
         # __name__ attribute which is needed in `run_transformer` further down.
@@ -649,7 +645,7 @@ class _RHSResult:
 
 def right_hand_split(
     line: Line,
-    line_length: int,
+    mode: Mode,
     features: Collection[Feature] = (),
     omit: Collection[LeafID] = (),
 ) -> Iterator[Line]:
@@ -663,7 +659,7 @@ def right_hand_split(
     """
     rhs_result = _first_right_hand_split(line, omit=omit)
     yield from _maybe_split_omitting_optional_parens(
-        rhs_result, line, line_length, features=features, omit=omit
+        rhs_result, line, mode, features=features, omit=omit
     )
 
 
@@ -718,7 +714,7 @@ def _first_right_hand_split(
 def _maybe_split_omitting_optional_parens(
     rhs: _RHSResult,
     line: Line,
-    line_length: int,
+    mode: Mode,
     features: Collection[Feature] = (),
     omit: Collection[LeafID] = (),
 ) -> Iterator[Line]:
@@ -736,7 +732,7 @@ def _maybe_split_omitting_optional_parens(
         # there are no standalone comments in the body
         and not rhs.body.contains_standalone_comments(0)
         # and we can actually remove the parens
-        and can_omit_invisible_parens(rhs.body, line_length)
+        and can_omit_invisible_parens(rhs.body, mode.line_length)
     ):
         omit = {id(rhs.closing_bracket), *omit}
         try:
@@ -751,23 +747,24 @@ def _maybe_split_omitting_optional_parens(
                 and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1])
                 # the left side of assignment is short enough (the -1 is for the ending
                 # optional paren)
-                and is_line_short_enough(rhs.head, line_length=line_length - 1)
+                and is_line_short_enough(
+                    rhs.head, mode=replace(mode, line_length=mode.line_length - 1)
+                )
                 # the left side of assignment won't explode further because of magic
                 # trailing comma
                 and rhs.head.magic_trailing_comma is None
                 # the split by omitting optional parens isn't preferred by some other
                 # reason
-                and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length)
+                and not _prefer_split_rhs_oop(rhs_oop, mode)
             ):
                 yield from _maybe_split_omitting_optional_parens(
-                    rhs_oop, line, line_length, features=features, omit=omit
+                    rhs_oop, line, mode, features=features, omit=omit
                 )
                 return
 
         except CannotSplit as e:
             if not (
-                can_be_split(rhs.body)
-                or is_line_short_enough(rhs.body, line_length=line_length)
+                can_be_split(rhs.body) or is_line_short_enough(rhs.body, mode=mode)
             ):
                 raise CannotSplit(
                     "Splitting failed, body is still too long and can't be split."
@@ -791,7 +788,7 @@ def _maybe_split_omitting_optional_parens(
             yield result
 
 
-def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
+def _prefer_split_rhs_oop(rhs_oop: _RHSResult, mode: Mode) -> bool:
     """
     Returns whether we should prefer the result from a split omitting optional parens.
     """
@@ -811,7 +808,7 @@ def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
             # the first line still contains the `=`)
             any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves)
             # the first line is short enough
-            and is_line_short_enough(rhs_oop.head, line_length=line_length)
+            and is_line_short_enough(rhs_oop.head, mode=mode)
         )
         # contains unsplittable type ignore
         or rhs_oop.head.contains_unsplittable_type_ignore()
@@ -1428,7 +1425,7 @@ def run_transformer(
         or line.contains_multiline_strings()
         or result[0].contains_uncollapsable_type_comments()
         or result[0].contains_unsplittable_type_ignore()
-        or is_line_short_enough(result[0], line_length=mode.line_length)
+        or is_line_short_enough(result[0], mode=mode)
         # If any leaves have no parents (which _can_ occur since
         # `transform(line)` potentially destroys the line's underlying node
         # structure), then we can't proceed. Doing so would cause the below
@@ -1443,8 +1440,6 @@ def run_transformer(
     second_opinion = run_transformer(
         line_copy, transform, mode, features_fop, line_str=line_str
     )
-    if all(
-        is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
-    ):
+    if all(is_line_short_enough(ln, mode=mode) for ln in second_opinion):
         result = second_opinion
     return result
diff --git a/src/black/lines.py b/src/black/lines.py
@@ -1,4 +1,5 @@
 import itertools
+import math
 import sys
 from dataclasses import dataclass, field
 from typing import (
@@ -10,6 +11,7 @@
     Sequence,
     Tuple,
     TypeVar,
+    Union,
     cast,
 )
 
@@ -37,6 +39,7 @@
 T = TypeVar("T")
 Index = int
 LeafID = int
+LN = Union[Leaf, Node]
 
 
 @dataclass
@@ -711,18 +714,85 @@ def append_leaves(
             new_line.append(comment_leaf, preformatted=True)
 
 
-def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
-    """Return True if `line` is no longer than `line_length`.
-
+def is_line_short_enough(  # noqa: C901
+    line: Line, *, mode: Mode, line_str: str = ""
+) -> bool:
+    """For non-multiline strings, return True if `line` is no longer than `line_length`.
+    For multiline strings, looks at the context around `line` to determine
+    if it should be inlined or split up.
     Uses the provided `line_str` rendering, if any, otherwise computes a new one.
     """
     if not line_str:
         line_str = line_to_string(line)
-    return (
-        len(line_str) <= line_length
-        and "\n" not in line_str  # multiline strings
-        and not line.contains_standalone_comments()
-    )
+
+    if Preview.multiline_string_handling not in mode:
+        return (
+            len(line_str) <= mode.line_length
+            and "\n" not in line_str  # multiline strings
+            and not line.contains_standalone_comments()
+        )
+
+    if line.contains_standalone_comments():
+        return False
+    if "\n" not in line_str:
+        # No multiline strings (MLS) present
+        return len(line_str) <= mode.line_length
+    else:
+        first, *_, last = line_str.split("\n")
+        if len(first) > mode.line_length or len(last) > mode.line_length:
+            return False
+
+        commas: List[int] = []  # tracks number of commas per depth level
+        multiline_string: Optional[Leaf] = None
+        multiline_string_contexts: List[LN] = []
+
+        max_level_to_update = math.inf
+        for i, leaf in enumerate(line.leaves):
+            if max_level_to_update == math.inf:
+                had_comma: Optional[int] = None
+                if leaf.bracket_depth + 1 > len(commas):
+                    commas.append(0)
+                elif leaf.bracket_depth + 1 < len(commas):
+                    had_comma = commas.pop()
+                if (
+                    had_comma is not None
+                    and multiline_string is not None
+                    and multiline_string.bracket_depth == leaf.bracket_depth + 1
+                ):
+                    # Have left the level with the MLS, stop tracking commas
+                    max_level_to_update = leaf.bracket_depth
+                    if had_comma > 0:
+                        # MLS was in parens with at least one comma - force split
+                        return False
+
+            if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA:
+                # Ignore non-nested trailing comma
+                # directly after MLS/MLS-containing expression
+                ignore_ctxs: List[Optional[LN]] = [None]
+                ignore_ctxs += multiline_string_contexts
+                if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1):
+                    commas[leaf.bracket_depth] += 1
+            if max_level_to_update != math.inf:
+                max_level_to_update = min(max_level_to_update, leaf.bracket_depth)
+
+            if is_multiline_string(leaf):
+                if len(multiline_string_contexts) > 0:
+                    # >1 multiline string cannot fit on a single line - force split
+                    return False
+                multiline_string = leaf
+                ctx: LN = leaf
+                while str(ctx) in line_str:
+                    multiline_string_contexts.append(ctx)
+                    if ctx.parent is None:
+                        break
+                    ctx = ctx.parent
+
+        # May not have a triple-quoted multiline string at all,
+        # in case of a regular string with embedded newlines and line continuations
+        if len(multiline_string_contexts) == 0:
+            return True
+
+        return all(val == 0 for val in commas)
 
 
 def can_be_split(line: Line) -> bool:

diff --git a/src/black/mode.py b/src/black/mode.py
@@ -153,6 +153,7 @@ class Preview(Enum):
     empty_lines_before_class_or_def_with_leading_comments = auto()
     handle_trailing_commas_in_head = auto()
     long_docstring_quotes_on_newline = auto()
+    multiline_string_handling = auto()
     normalize_docstring_quotes_and_prefixes_properly = auto()
     one_element_subscript = auto()
     prefer_splitting_right_hand_side_of_assignments = auto()