Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve multiline string handling #1879

Merged
merged 18 commits into from Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -133,6 +133,7 @@ versions separately.
code. Implicitly concatenated f-strings with different quotes can now be merged or
quote-normalized by changing the quotes used in expressions. (#3509)
- Fix crash on `await (yield)` when Black is compiled with mypyc (#3533)
- Improve handling of multiline strings by changing line split behavior (#1879)

### Configuration

Expand Down
48 changes: 48 additions & 0 deletions docs/the_black_code_style/future_style.md
Expand Up @@ -111,3 +111,51 @@ my_dict = {
"another key": short_value,
}
```

### Improved multiline string handling

_Black_ is smarter when formatting multiline strings, especially in function arguments,
to avoid introducing extra line breaks. Previously, it would always consider multiline
strings as not fitting on a single line. With this new feature, _Black_ looks at the
context around the multiline string to decide if it should be inlined or split to a
separate line. For example, when a multiline string is passed to a function, _Black_
will only split the multiline string if a line is too long or if multiple arguments are
being passed.

For example, _Black_ will reformat

```python
textwrap.dedent(
"""\
This is a
multiline string
"""
)
```

to:

```python
textwrap.dedent("""\
This is a
multiline string
""")
```

And:

```python
MULTILINE = """
foobar
""".replace(
"\n", ""
)
```

to:

```python
MULTILINE = """
foobar
""".replace("\n", "")
```
43 changes: 19 additions & 24 deletions src/black/linegen.py
Expand Up @@ -2,7 +2,7 @@
Generating lines of code.
"""
import sys
from dataclasses import dataclass
from dataclasses import dataclass, replace
from enum import Enum, auto
from functools import partial, wraps
from typing import Collection, Iterator, List, Optional, Set, Union, cast
Expand Down Expand Up @@ -505,7 +505,7 @@ def transform_line(
and not line.should_split_rhs
and not line.magic_trailing_comma
and (
is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
is_line_short_enough(line, mode=mode, line_str=line_str)
or line.contains_unsplittable_type_ignore()
)
and not (line.inside_brackets and line.contains_standalone_comments())
Expand All @@ -529,24 +529,20 @@ def _rhs(
bracket pair instead.
"""
for omit in generate_trailers_to_omit(line, mode.line_length):
lines = list(
right_hand_split(line, mode.line_length, features, omit=omit)
)
lines = list(right_hand_split(line, mode, features, omit=omit))
# Note: this check is only able to figure out if the first line of the
# *current* transformation fits in the line length. This is true only
# for simple cases. All others require running more transforms via
# `transform_line()`. This check doesn't know if those would succeed.
if is_line_short_enough(lines[0], line_length=mode.line_length):
if is_line_short_enough(lines[0], mode=mode):
yield from lines
return

# All splits failed, best effort split with no omits.
# This mostly happens to multiline strings that are by definition
# reported as not fitting a single line, as well as lines that contain
# trailing commas (those have to be exploded).
yield from right_hand_split(
line, line_length=mode.line_length, features=features
)
yield from right_hand_split(line, mode, features=features)

# HACK: nested functions (like _rhs) compiled by mypyc don't retain their
# __name__ attribute which is needed in `run_transformer` further down.
Expand Down Expand Up @@ -664,7 +660,7 @@ class _RHSResult:

def right_hand_split(
line: Line,
line_length: int,
mode: Mode,
features: Collection[Feature] = (),
omit: Collection[LeafID] = (),
) -> Iterator[Line]:
Expand All @@ -678,7 +674,7 @@ def right_hand_split(
"""
rhs_result = _first_right_hand_split(line, omit=omit)
yield from _maybe_split_omitting_optional_parens(
rhs_result, line, line_length, features=features, omit=omit
rhs_result, line, mode, features=features, omit=omit
)


Expand Down Expand Up @@ -733,7 +729,7 @@ def _first_right_hand_split(
def _maybe_split_omitting_optional_parens(
rhs: _RHSResult,
line: Line,
line_length: int,
mode: Mode,
features: Collection[Feature] = (),
omit: Collection[LeafID] = (),
) -> Iterator[Line]:
Expand All @@ -751,7 +747,7 @@ def _maybe_split_omitting_optional_parens(
# there are no standalone comments in the body
and not rhs.body.contains_standalone_comments(0)
# and we can actually remove the parens
and can_omit_invisible_parens(rhs.body, line_length)
and can_omit_invisible_parens(rhs.body, mode.line_length)
):
omit = {id(rhs.closing_bracket), *omit}
try:
Expand All @@ -766,23 +762,24 @@ def _maybe_split_omitting_optional_parens(
and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1])
# the left side of assignment is short enough (the -1 is for the ending
# optional paren)
and is_line_short_enough(rhs.head, line_length=line_length - 1)
and is_line_short_enough(
rhs.head, mode=replace(mode, line_length=mode.line_length - 1)
)
# the left side of assignment won't explode further because of magic
# trailing comma
and rhs.head.magic_trailing_comma is None
# the split by omitting optional parens isn't preferred by some other
# reason
and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length)
and not _prefer_split_rhs_oop(rhs_oop, mode)
):
yield from _maybe_split_omitting_optional_parens(
rhs_oop, line, line_length, features=features, omit=omit
rhs_oop, line, mode, features=features, omit=omit
)
return

except CannotSplit as e:
if not (
can_be_split(rhs.body)
or is_line_short_enough(rhs.body, line_length=line_length)
can_be_split(rhs.body) or is_line_short_enough(rhs.body, mode=mode)
):
raise CannotSplit(
"Splitting failed, body is still too long and can't be split."
Expand All @@ -806,7 +803,7 @@ def _maybe_split_omitting_optional_parens(
yield result


def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
def _prefer_split_rhs_oop(rhs_oop: _RHSResult, mode: Mode) -> bool:
"""
Returns whether we should prefer the result from a split omitting optional parens.
"""
Expand All @@ -826,7 +823,7 @@ def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
# the first line still contains the `=`)
any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves)
# the first line is short enough
and is_line_short_enough(rhs_oop.head, line_length=line_length)
and is_line_short_enough(rhs_oop.head, mode=mode)
)
# contains unsplittable type ignore
or rhs_oop.head.contains_unsplittable_type_ignore()
Expand Down Expand Up @@ -1525,7 +1522,7 @@ def run_transformer(
or line.contains_multiline_strings()
or result[0].contains_uncollapsable_type_comments()
or result[0].contains_unsplittable_type_ignore()
or is_line_short_enough(result[0], line_length=mode.line_length)
or is_line_short_enough(result[0], mode=mode)
# If any leaves have no parents (which _can_ occur since
# `transform(line)` potentially destroys the line's underlying node
# structure), then we can't proceed. Doing so would cause the below
Expand All @@ -1540,8 +1537,6 @@ def run_transformer(
second_opinion = run_transformer(
line_copy, transform, mode, features_fop, line_str=line_str
)
if all(
is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
):
if all(is_line_short_enough(ln, mode=mode) for ln in second_opinion):
result = second_opinion
return result
96 changes: 87 additions & 9 deletions src/black/lines.py
@@ -1,4 +1,5 @@
import itertools
import math
import sys
from dataclasses import dataclass, field
from typing import (
Expand All @@ -10,11 +11,12 @@
Sequence,
Tuple,
TypeVar,
Union,
cast,
)

from black.brackets import DOT_PRIORITY, BracketTracker
from black.mode import Mode
from black.mode import Mode, Preview
from black.nodes import (
BRACKETS,
CLOSING_BRACKETS,
Expand All @@ -37,6 +39,7 @@
T = TypeVar("T")
Index = int
LeafID = int
LN = Union[Leaf, Node]


@dataclass
Expand Down Expand Up @@ -701,18 +704,93 @@ def append_leaves(
new_line.append(comment_leaf, preformatted=True)


def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
"""Return True if `line` is no longer than `line_length`.

def is_line_short_enough( # noqa: C901
line: Line, *, mode: Mode, line_str: str = ""
) -> bool:
"""For non-multiline strings, return True if `line` is no longer than `line_length`.
For multiline strings, looks at the context around `line` to determine
if it should be inlined or split up.
Uses the provided `line_str` rendering, if any, otherwise computes a new one.
"""
if not line_str:
line_str = line_to_string(line)
return (
len(line_str) <= line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)

if Preview.multiline_string_handling not in mode:
return (
len(line_str) <= mode.line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)

if line.contains_standalone_comments():
return False
if "\n" not in line_str:
# No multiline strings (MLS) present
return len(line_str) <= mode.line_length

first, *_, last = line_str.split("\n")
if len(first) > mode.line_length or len(last) > mode.line_length:
return False

# Traverse the AST to examine the context of the multiline string (MLS),
# tracking aspects such as depth and comma existence,
# to determine whether to split the MLS or keep it together.
# Depth (which is based on the existing bracket_depth concept)
# is needed to determine nesting level of the MLS.
# Includes special case for trailing commas.
commas: List[int] = [] # tracks number of commas per depth level
multiline_string: Optional[Leaf] = None
# store the leaves that contain parts of the MLS
multiline_string_contexts: List[LN] = []

max_level_to_update = math.inf # track the depth of the MLS
for i, leaf in enumerate(line.leaves):
if max_level_to_update == math.inf:
had_comma: Optional[int] = None
if leaf.bracket_depth + 1 > len(commas):
commas.append(0)
elif leaf.bracket_depth + 1 < len(commas):
had_comma = commas.pop()
if (
had_comma is not None
and multiline_string is not None
and multiline_string.bracket_depth == leaf.bracket_depth + 1
):
# Have left the level with the MLS, stop tracking commas
max_level_to_update = leaf.bracket_depth
if had_comma > 0:
# MLS was in parens with at least one comma - force split
return False

if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA:
# Ignore non-nested trailing comma
# directly after MLS/MLS-containing expression
ignore_ctxs: List[Optional[LN]] = [None]
ignore_ctxs += multiline_string_contexts
if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1):
commas[leaf.bracket_depth] += 1
if max_level_to_update != math.inf:
max_level_to_update = min(max_level_to_update, leaf.bracket_depth)

if is_multiline_string(leaf):
if len(multiline_string_contexts) > 0:
# >1 multiline string cannot fit on a single line - force split
return False
multiline_string = leaf
ctx: LN = leaf
# fetch the leaf components of the MLS in the AST
while str(ctx) in line_str:
multiline_string_contexts.append(ctx)
if ctx.parent is None:
break
ctx = ctx.parent

# May not have a triple-quoted multiline string at all,
# in case of a regular string with embedded newlines and line continuations
if len(multiline_string_contexts) == 0:
return True

return all(val == 0 for val in commas)


def can_be_split(line: Line) -> bool:
Expand Down
1 change: 1 addition & 0 deletions src/black/mode.py
Expand Up @@ -155,6 +155,7 @@ class Preview(Enum):

add_trailing_comma_consistently = auto()
hex_codes_in_unicode_sequences = auto()
multiline_string_handling = auto()
prefer_splitting_right_hand_side_of_assignments = auto()
# NOTE: string_processing requires wrap_long_dict_values_in_parens
# for https://github.com/psf/black/issues/3117 to be fixed.
Expand Down