From 697f9383633baa397f9ecc3f97df274d978bded8 Mon Sep 17 00:00:00 2001 From: Aneesh Agrawal Date: Mon, 21 Dec 2020 16:56:59 -0500 Subject: [PATCH 01/14] Remove unused function `remove_trailing_comma` --- src/black/__init__.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/black/__init__.py b/src/black/__init__.py index 48690573810..e6575849376 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -1733,14 +1733,6 @@ def comments_after(self, leaf: Leaf) -> List[Leaf]: """Generate comments that should appear directly after `leaf`.""" return self.comments.get(id(leaf), []) - def remove_trailing_comma(self) -> None: - """Remove the trailing comma and moves the comments attached to it.""" - trailing_comma = self.leaves.pop() - trailing_comma_comments = self.comments.pop(id(trailing_comma), []) - self.comments.setdefault(id(self.leaves[-1]), []).extend( - trailing_comma_comments - ) - def is_complex_subscript(self, leaf: Leaf) -> bool: """Return True iff `leaf` is part of a slice with non-trivial exprs.""" open_lsqb = self.bracket_tracker.get_open_lsqb() From 8e412bf1b45966c056219719c25996e920590387 Mon Sep 17 00:00:00 2001 From: Aneesh Agrawal Date: Mon, 21 Dec 2020 16:56:59 -0500 Subject: [PATCH 02/14] Improve multiline string handling --- src/black/__init__.py | 69 ++++- tests/data/composition.py | 4 +- tests/data/composition_no_trailing_comma.py | 4 +- tests/data/multiline_strings.py | 318 ++++++++++++++++++++ tests/test_black.py | 8 + 5 files changed, 392 insertions(+), 11 deletions(-) create mode 100644 tests/data/multiline_strings.py diff --git a/src/black/__init__.py b/src/black/__init__.py index e6575849376..251b3d3dc7b 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -10,6 +10,7 @@ import io import itertools import logging +import math from multiprocessing import Manager, freeze_support import os from pathlib import Path @@ -6458,13 +6459,71 @@ def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> Uses the provided `line_str` rendering, if any, otherwise computes a new one. """ + if not line_str: line_str = line_to_string(line) - return ( - len(line_str) <= line_length - and "\n" not in line_str # multiline strings - and not line.contains_standalone_comments() - ) + if line.contains_standalone_comments(): + return False + if "\n" not in line_str: + # No multi-line strings present + return len(line_str) <= line_length + else: + first, *_, last = line_str.split("\n") + if len(first) > line_length or len(last) > line_length: + return False + + commas: List[int] = [] + multiline_string = None + multiline_string_contexts: List[LN] = [] + + max_level_to_update = math.inf + # TODO: need to take into account bits from BracketTracker re: fixups for for/in, lambdas + for i, leaf in enumerate(line.leaves): + if max_level_to_update == math.inf: + had_comma = None + if leaf.bracket_depth + 1 > len(commas): + commas.append(0) + elif leaf.bracket_depth + 1 < len(commas): + had_comma = commas.pop() + if ( + had_comma is not None + and multiline_string is not None + and multiline_string.bracket_depth == leaf.bracket_depth + 1 + ): + # Have left the level with the MLS, stop tracking commas + max_level_to_update = leaf.bracket_depth + if had_comma > 0: + # MLS was in parens with at least one comma - force split + return False + + if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA: + # Ignore non-nested trailing comma + # directly after MLS/MLS-containing expression + ignore_ctxs: List[Optional[LN]] = [None] + ignore_ctxs += multiline_string_contexts + if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1): + commas[leaf.bracket_depth] += 1 + if max_level_to_update != math.inf: + max_level_to_update = min(max_level_to_update, leaf.bracket_depth) + + if is_multiline_string(leaf): + if len(multiline_string_contexts) > 0: + # >1 multiline string cannot fit on a single line - force split + return False + multiline_string = leaf + ctx: LN = leaf + while str(ctx) in line_str: + multiline_string_contexts.append(ctx) + if ctx.parent is None: + break + ctx = ctx.parent + + # May not have a triple-quoted multiline string at all, + # in case of a regular string with embedded newlines and line continuations + if len(multiline_string_contexts) == 0: + return True + + return all(val == 0 for val in commas) def can_be_split(line: Line) -> bool: diff --git a/tests/data/composition.py b/tests/data/composition.py index e429f15e669..0798d3f3b29 100644 --- a/tests/data/composition.py +++ b/tests/data/composition.py @@ -161,9 +161,7 @@ def tricky_asserts(self) -> None: 8 STORE_ATTR 0 (x) 10 LOAD_CONST 0 (None) 12 RETURN_VALUE - """ % ( - _C.__init__.__code__.co_firstlineno + 1, - ) + """ % (_C.__init__.__code__.co_firstlineno + 1,) assert ( expectedexpectedexpectedexpectedexpectedexpectedexpectedexpectedexpect diff --git a/tests/data/composition_no_trailing_comma.py b/tests/data/composition_no_trailing_comma.py index f17b89dea8d..88d17b743de 100644 --- a/tests/data/composition_no_trailing_comma.py +++ b/tests/data/composition_no_trailing_comma.py @@ -347,9 +347,7 @@ def tricky_asserts(self) -> None: 8 STORE_ATTR 0 (x) 10 LOAD_CONST 0 (None) 12 RETURN_VALUE - """ % ( - _C.__init__.__code__.co_firstlineno + 1, - ) + """ % (_C.__init__.__code__.co_firstlineno + 1,) assert ( expectedexpectedexpectedexpectedexpectedexpectedexpectedexpectedexpect diff --git a/tests/data/multiline_strings.py b/tests/data/multiline_strings.py new file mode 100644 index 00000000000..4947b0974a8 --- /dev/null +++ b/tests/data/multiline_strings.py @@ -0,0 +1,318 @@ +"""cow +say""", +call(3, "dogsay", textwrap.dedent("""dove + coo""" % "cowabunga")) +call(3, textwrap.dedent("""cow + moo""" % "cowabunga"), "dogsay") +call(3, "dogsay", textwrap.dedent("""crow + caw""" % "cowabunga"),) +call(3, textwrap.dedent("""cat + meow""" % "cowabunga"), {"dog", "say"}) +call(3, {"dog", "say"}, textwrap.dedent("""horse + neigh""" % "cowabunga")) +call(3, {"dog", "say"}, textwrap.dedent("""pig + oink""" % "cowabunga"),) +textwrap.dedent("""A one-line triple-quoted string.""") +textwrap.dedent("""A two-line triple-quoted string +since it goes to the next line.""") +textwrap.dedent("""A three-line triple-quoted string +that not only goes to the next line +but also goes one line beyond.""") +textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. file contents. +""") +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. file contents. +""")) +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. {config_filename} file contents. +""".format("config_filename", config_filename))) +# Another use case +data = yaml.load("""\ +a: 1 +b: 2 +""") +data = yaml.load("""\ +a: 1 +b: 2 +""",) + +MULTILINE = """ +foo +""".replace("\n", "") +generated_readme = lambda project_name: """ +{} + + +""".strip().format(project_name) +parser.usage += """ +Custom extra help summary. + +Extra test: +- with +- bullets +""" + + +def get_stuff(cr, value): + # original + cr.execute(""" + SELECT whatever + FROM some_table t + WHERE id = %s + """, [value]) + return cr.fetchone() + + +def get_stuff(cr, value): + # preferred + cr.execute( + """ + SELECT whatever + FROM some_table t + WHERE id = %s + """, + [value], + ) + return cr.fetchone() + + +call(arg1, arg2, """ +short +""", arg3=True) +test_vectors = [ + "one-liner\n", + "two\nliner\n", + """expressed +as a three line +mulitline string""", +] + +_wat = re.compile( + r""" + regex + """, + re.MULTILINE | re.VERBOSE, +) +dis_c_instance_method = """\ +%3d 0 LOAD_FAST 1 (x) + 2 LOAD_CONST 1 (1) + 4 COMPARE_OP 2 (==) + 6 LOAD_FAST 0 (self) + 8 STORE_ATTR 0 (x) + 10 LOAD_CONST 0 (None) + 12 RETURN_VALUE +""" % (_C.__init__.__code__.co_firstlineno + 1,) +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually {verb} the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. {file_type} file contents. +""".format(verb="using", file_type="test"))) +{"""cow +moos"""} +["""cow +moos"""] +["""cow +moos""", """dog +woofs +and +barks"""] +def dastardly_default_value( + cow: String = json.loads("""this +is +quite +the +dastadardly +value!"""), + **kwargs, +): + pass +# output +"""cow +say""", +call( + 3, + "dogsay", + textwrap.dedent("""dove + coo""" % "cowabunga"), +) +call( + 3, + textwrap.dedent("""cow + moo""" % "cowabunga"), + "dogsay", +) +call( + 3, + "dogsay", + textwrap.dedent("""crow + caw""" % "cowabunga"), +) +call( + 3, + textwrap.dedent("""cat + meow""" % "cowabunga"), + {"dog", "say"}, +) +call( + 3, + {"dog", "say"}, + textwrap.dedent("""horse + neigh""" % "cowabunga"), +) +call( + 3, + {"dog", "say"}, + textwrap.dedent("""pig + oink""" % "cowabunga"), +) +textwrap.dedent("""A one-line triple-quoted string.""") +textwrap.dedent("""A two-line triple-quoted string +since it goes to the next line.""") +textwrap.dedent("""A three-line triple-quoted string +that not only goes to the next line +but also goes one line beyond.""") +textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. file contents. +""") +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. file contents. +""")) +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually leveraging the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. {config_filename} file contents. +""".format("config_filename", config_filename))) +# Another use case +data = yaml.load("""\ +a: 1 +b: 2 +""") +data = yaml.load( + """\ +a: 1 +b: 2 +""", +) + +MULTILINE = """ +foo +""".replace("\n", "") +generated_readme = lambda project_name: """ +{} + + +""".strip().format(project_name) +parser.usage += """ +Custom extra help summary. + +Extra test: +- with +- bullets +""" + + +def get_stuff(cr, value): + # original + cr.execute( + """ + SELECT whatever + FROM some_table t + WHERE id = %s + """, + [value], + ) + return cr.fetchone() + + +def get_stuff(cr, value): + # preferred + cr.execute( + """ + SELECT whatever + FROM some_table t + WHERE id = %s + """, + [value], + ) + return cr.fetchone() + + +call( + arg1, + arg2, + """ +short +""", + arg3=True, +) +test_vectors = [ + "one-liner\n", + "two\nliner\n", + """expressed +as a three line +mulitline string""", +] + +_wat = re.compile( + r""" + regex + """, + re.MULTILINE | re.VERBOSE, +) +dis_c_instance_method = """\ +%3d 0 LOAD_FAST 1 (x) + 2 LOAD_CONST 1 (1) + 4 COMPARE_OP 2 (==) + 6 LOAD_FAST 0 (self) + 8 STORE_ATTR 0 (x) + 10 LOAD_CONST 0 (None) + 12 RETURN_VALUE +""" % (_C.__init__.__code__.co_firstlineno + 1,) +path.write_text(textwrap.dedent("""\ + A triple-quoted string + actually {verb} the textwrap.dedent functionality + that ends in a trailing newline, + representing e.g. {file_type} file contents. +""".format(verb="using", file_type="test"))) +{"""cow +moos"""} +["""cow +moos"""] +[ + """cow +moos""", + """dog +woofs +and +barks""", +] + + +def dastardly_default_value( + cow: String = json.loads("""this +is +quite +the +dastadardly +value!"""), + **kwargs, +): + pass diff --git a/tests/test_black.py b/tests/test_black.py index a688c8780ef..f910ed4f94d 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -359,6 +359,14 @@ def test_string_quotes(self) -> None: black.assert_equivalent(source, not_normalized) black.assert_stable(source, not_normalized, mode=mode) + @patch("black.dump_to_file", dump_to_stderr) + def test_multiline_strings(self) -> None: + source, expected = read_data("multiline_strings") + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, DEFAULT_MODE) + @patch("black.dump_to_file", dump_to_stderr) def test_docstring_no_string_normalization(self) -> None: """Like test_docstring but with string normalization off.""" From 00fb92948d2645d5bd08e1fc0c159696207cfa39 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Wed, 26 Oct 2022 12:04:45 -0400 Subject: [PATCH 03/14] move logic under preview --- CHANGES.md | 1 + src/black/linegen.py | 20 ++++++++--------- src/black/lines.py | 22 +++++++++++++------ src/black/mode.py | 1 + tests/data/simple_cases/composition.py | 4 +++- .../composition_no_trailing_comma.py | 4 +++- 6 files changed, 33 insertions(+), 19 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 67451f7caf5..d763fe64661 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,7 @@ - Enforce empty lines before classes and functions with sticky leading comments (#3302) +- TODO multiline string entry ### Configuration diff --git a/src/black/linegen.py b/src/black/linegen.py index a2e41bf5912..b001dd8b01f 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -468,7 +468,7 @@ def transform_line( and not line.should_split_rhs and not line.magic_trailing_comma and ( - is_line_short_enough(line, line_length=mode.line_length, line_str=line_str) + is_line_short_enough(line, mode=mode, line_str=line_str) or line.contains_unsplittable_type_ignore() ) and not (line.inside_brackets and line.contains_standalone_comments()) @@ -493,13 +493,13 @@ def _rhs( """ for omit in generate_trailers_to_omit(line, mode.line_length): lines = list( - right_hand_split(line, mode.line_length, features, omit=omit) + right_hand_split(line, mode, features, omit=omit) ) # Note: this check is only able to figure out if the first line of the # *current* transformation fits in the line length. This is true only # for simple cases. All others require running more transforms via # `transform_line()`. This check doesn't know if those would succeed. - if is_line_short_enough(lines[0], line_length=mode.line_length): + if is_line_short_enough(lines[0], mode=mode): yield from lines return @@ -508,7 +508,7 @@ def _rhs( # reported as not fitting a single line, as well as lines that contain # trailing commas (those have to be exploded). yield from right_hand_split( - line, line_length=mode.line_length, features=features + line, mode, features=features ) # HACK: nested functions (like _rhs) compiled by mypyc don't retain their @@ -602,7 +602,7 @@ def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator def right_hand_split( line: Line, - line_length: int, + mode: Mode, features: Collection[Feature] = (), omit: Collection[LeafID] = (), ) -> Iterator[Line]: @@ -657,17 +657,17 @@ def right_hand_split( # there are no standalone comments in the body and not body.contains_standalone_comments(0) # and we can actually remove the parens - and can_omit_invisible_parens(body, line_length) + and can_omit_invisible_parens(body, mode.line_length) ): omit = {id(closing_bracket), *omit} try: - yield from right_hand_split(line, line_length, features=features, omit=omit) + yield from right_hand_split(line, mode, features=features, omit=omit) return except CannotSplit as e: if not ( can_be_split(body) - or is_line_short_enough(body, line_length=line_length) + or is_line_short_enough(body, mode=mode) ): raise CannotSplit( "Splitting failed, body is still too long and can't be split." @@ -1272,7 +1272,7 @@ def run_transformer( or line.contains_multiline_strings() or result[0].contains_uncollapsable_type_comments() or result[0].contains_unsplittable_type_ignore() - or is_line_short_enough(result[0], line_length=mode.line_length) + or is_line_short_enough(result[0], mode=mode) # If any leaves have no parents (which _can_ occur since # `transform(line)` potentially destroys the line's underlying node # structure), then we can't proceed. Doing so would cause the below @@ -1288,7 +1288,7 @@ def run_transformer( line_copy, transform, mode, features_fop, line_str=line_str ) if all( - is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion + is_line_short_enough(ln, mode=mode) for ln in second_opinion ): result = second_opinion return result diff --git a/src/black/lines.py b/src/black/lines.py index 923b8104763..a8b2dd7a0f8 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -35,6 +35,7 @@ from blib2to3.pgen2 import token from blib2to3.pytree import Leaf, Node +# types T = TypeVar("T") Index = int LeafID = int @@ -710,21 +711,28 @@ def append_leaves( new_line.append(comment_leaf, preformatted=True) -def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool: +def is_line_short_enough(line: Line, *, mode: Mode, line_str: str = "") -> bool: """Return True if `line` is no longer than `line_length`. - Uses the provided `line_str` rendering, if any, otherwise computes a new one. """ if not line_str: line_str = line_to_string(line) + + if Preview.multiline_string_handling not in mode: + return ( + len(line_str) <= mode.line_length + and "\n" not in line_str # multiline strings + and not line.contains_standalone_comments() + ) + if line.contains_standalone_comments(): return False if "\n" not in line_str: # No multi-line strings present - return len(line_str) <= line_length + return len(line_str) <= mode.line_length else: first, *_, last = line_str.split("\n") - if len(first) > line_length or len(last) > line_length: + if len(first) > mode.line_length or len(last) > mode.line_length: return False commas: List[int] = [] @@ -740,9 +748,9 @@ def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> elif leaf.bracket_depth + 1 < len(commas): had_comma = commas.pop() if ( - had_comma is not None - and multiline_string is not None - and multiline_string.bracket_depth == leaf.bracket_depth + 1 + had_comma is not None + and multiline_string is not None + and multiline_string.bracket_depth == leaf.bracket_depth + 1 ): # Have left the level with the MLS, stop tracking commas max_level_to_update = leaf.bracket_depth diff --git a/src/black/mode.py b/src/black/mode.py index 1e83f2a9c6d..b69178f4569 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -152,6 +152,7 @@ class Preview(Enum): annotation_parens = auto() empty_lines_before_class_or_def_with_leading_comments = auto() long_docstring_quotes_on_newline = auto() + multiline_string_handling = auto() normalize_docstring_quotes_and_prefixes_properly = auto() one_element_subscript = auto() remove_block_trailing_newline = auto() diff --git a/tests/data/simple_cases/composition.py b/tests/data/simple_cases/composition.py index 0798d3f3b29..e429f15e669 100644 --- a/tests/data/simple_cases/composition.py +++ b/tests/data/simple_cases/composition.py @@ -161,7 +161,9 @@ def tricky_asserts(self) -> None: 8 STORE_ATTR 0 (x) 10 LOAD_CONST 0 (None) 12 RETURN_VALUE - """ % (_C.__init__.__code__.co_firstlineno + 1,) + """ % ( + _C.__init__.__code__.co_firstlineno + 1, + ) assert ( expectedexpectedexpectedexpectedexpectedexpectedexpectedexpectedexpect diff --git a/tests/data/simple_cases/composition_no_trailing_comma.py b/tests/data/simple_cases/composition_no_trailing_comma.py index 88d17b743de..f17b89dea8d 100644 --- a/tests/data/simple_cases/composition_no_trailing_comma.py +++ b/tests/data/simple_cases/composition_no_trailing_comma.py @@ -347,7 +347,9 @@ def tricky_asserts(self) -> None: 8 STORE_ATTR 0 (x) 10 LOAD_CONST 0 (None) 12 RETURN_VALUE - """ % (_C.__init__.__code__.co_firstlineno + 1,) + """ % ( + _C.__init__.__code__.co_firstlineno + 1, + ) assert ( expectedexpectedexpectedexpectedexpectedexpectedexpectedexpectedexpect From ed3dab09e0a69d7b80bc2dd40e8b91eeae6463e4 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Wed, 26 Oct 2022 12:11:04 -0400 Subject: [PATCH 04/14] undo random newline removal --- tests/test_black.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_black.py b/tests/test_black.py index feeba98ba0f..5d0175d9d66 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -2196,6 +2196,7 @@ def tracefunc( frame: types.FrameType, event: str, arg: Any ) -> Callable[[types.FrameType, str, Any], Any]: """Show function calls `from black/__init__.py` as they happen. + Register this with `sys.settrace()` in a test you're debugging. """ if event != "call": From b46e9979a9121bd93dfe50ae63f2f8539c899fc4 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Wed, 26 Oct 2022 16:33:49 -0400 Subject: [PATCH 05/14] fix/add a few comments --- src/black/lines.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/black/lines.py b/src/black/lines.py index a8b2dd7a0f8..77197713f12 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -711,7 +711,9 @@ def append_leaves( new_line.append(comment_leaf, preformatted=True) -def is_line_short_enough(line: Line, *, mode: Mode, line_str: str = "") -> bool: +def is_line_short_enough( # noqa: C901 + line: Line, *, mode: Mode, line_str: str = "" +) -> bool: """Return True if `line` is no longer than `line_length`. Uses the provided `line_str` rendering, if any, otherwise computes a new one. """ @@ -728,14 +730,14 @@ def is_line_short_enough(line: Line, *, mode: Mode, line_str: str = "") -> bool: if line.contains_standalone_comments(): return False if "\n" not in line_str: - # No multi-line strings present + # No multiline strings (MLS) present return len(line_str) <= mode.line_length else: first, *_, last = line_str.split("\n") if len(first) > mode.line_length or len(last) > mode.line_length: return False - commas: List[int] = [] + commas: List[int] = [] # tracks number of commas per depth level multiline_string: Optional[Leaf] = None multiline_string_contexts: List[LN] = [] From a6f9f358075a1d336efac0f2e519c45740950193 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Mon, 31 Oct 2022 12:33:26 -0400 Subject: [PATCH 06/14] fix self lint --- src/black/linegen.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/black/linegen.py b/src/black/linegen.py index b001dd8b01f..0682e02367f 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -492,9 +492,7 @@ def _rhs( bracket pair instead. """ for omit in generate_trailers_to_omit(line, mode.line_length): - lines = list( - right_hand_split(line, mode, features, omit=omit) - ) + lines = list(right_hand_split(line, mode, features, omit=omit)) # Note: this check is only able to figure out if the first line of the # *current* transformation fits in the line length. This is true only # for simple cases. All others require running more transforms via @@ -507,9 +505,7 @@ def _rhs( # This mostly happens to multiline strings that are by definition # reported as not fitting a single line, as well as lines that contain # trailing commas (those have to be exploded). - yield from right_hand_split( - line, mode, features=features - ) + yield from right_hand_split(line, mode, features=features) # HACK: nested functions (like _rhs) compiled by mypyc don't retain their # __name__ attribute which is needed in `run_transformer` further down. @@ -665,10 +661,7 @@ def right_hand_split( return except CannotSplit as e: - if not ( - can_be_split(body) - or is_line_short_enough(body, mode=mode) - ): + if not (can_be_split(body) or is_line_short_enough(body, mode=mode)): raise CannotSplit( "Splitting failed, body is still too long and can't be split." ) from e @@ -1287,8 +1280,6 @@ def run_transformer( second_opinion = run_transformer( line_copy, transform, mode, features_fop, line_str=line_str ) - if all( - is_line_short_enough(ln, mode=mode) for ln in second_opinion - ): + if all(is_line_short_enough(ln, mode=mode) for ln in second_opinion): result = second_opinion return result From f8ddbfbf998411dbdc2dd5bd1abba21dbf13d8d6 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Wed, 2 Nov 2022 17:27:12 -0400 Subject: [PATCH 07/14] add documentation --- CHANGES.md | 2 +- docs/the_black_code_style/future_style.md | 48 +++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index e26c9c9f97c..00836ca1b6d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,7 +17,7 @@ - Enforce empty lines before classes and functions with sticky leading comments (#3302) - Implicitly concatenated strings used as function args are now wrapped inside parentheses (#3307) -- TODO multiline string entry +- Improve handling of multiline strings by changing line split behavior (#1879) ### Configuration diff --git a/docs/the_black_code_style/future_style.md b/docs/the_black_code_style/future_style.md index 17b7eef092f..8f45ea3b7e3 100644 --- a/docs/the_black_code_style/future_style.md +++ b/docs/the_black_code_style/future_style.md @@ -152,3 +152,51 @@ with open("bla.txt") as f, open("x"): async def main(): await asyncio.sleep(1) ``` + +### Improved multiline string handling + +_Black_ is smarter when formatting multiline strings, especially in function arguments, +to avoid introducing extra line breaks. Previously, it would always consider multiline +strings as not fitting on a single line. With this new feature, _Black_ looks at the +context around the multiline string to decide if it should be inlined or split to a +separate line. For example, when a multiline string is passed to a function, _Black_ +will only split the multiline string if a line is too long or if multiple arguments are +being passed. + +For example, _Black_ will reformat + +```python +textwrap.dedent( + """\ + This is a + multiline string +""" +) +``` + +to: + +```python +textwrap.dedent("""\ + This is a + multiline string +""") +``` + +And: + +```python +MULTILINE = """ +foobar +""".replace( + "\n", "" +) +``` + +to: + +```python +MULTILINE = """ +foobar +""".replace("\n", "") +``` From c20506d9f66019a7ddc6eacd79b907f81ce7f134 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Wed, 2 Nov 2022 17:41:35 -0400 Subject: [PATCH 08/14] also update comment --- src/black/lines.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/black/lines.py b/src/black/lines.py index 77197713f12..951d327b57a 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -714,7 +714,9 @@ def append_leaves( def is_line_short_enough( # noqa: C901 line: Line, *, mode: Mode, line_str: str = "" ) -> bool: - """Return True if `line` is no longer than `line_length`. + """For non-multiline strings, return True if `line` is no longer than `line_length`. + For multiline strings, looks at the context around `line` to determine + if it should be inlined or split up. Uses the provided `line_str` rendering, if any, otherwise computes a new one. """ if not line_str: From c1d7679e31e816fc4cc6b872ac5bd485adc79a78 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Wed, 21 Dec 2022 19:23:08 -0500 Subject: [PATCH 09/14] f-string test case --- tests/data/preview/multiline_strings.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/data/preview/multiline_strings.py b/tests/data/preview/multiline_strings.py index 4947b0974a8..3f7b431d920 100644 --- a/tests/data/preview/multiline_strings.py +++ b/tests/data/preview/multiline_strings.py @@ -137,6 +137,17 @@ def dastardly_default_value( **kwargs, ): pass + +print(f""" + This {animal} + moos and barks +{animal} say +""") +msg = f"""The arguments {bad_arguments} were passed in. +Please use `--build-option` instead, +`--global-option` is reserved to flags like `--verbose` or `--quiet`. +""" + # output """cow say""", @@ -316,3 +327,14 @@ def dastardly_default_value( **kwargs, ): pass + + +print(f""" + This {animal} + moos and barks +{animal} say +""") +msg = f"""The arguments {bad_arguments} were passed in. +Please use `--build-option` instead, +`--global-option` is reserved to flags like `--verbose` or `--quiet`. +""" From 9ce0e9b6031c50ab43b0ef7403c3a24a09b7f730 Mon Sep 17 00:00:00 2001 From: Olivia Hong <24500729+olivia-hong@users.noreply.github.com> Date: Wed, 21 Dec 2022 20:20:12 -0500 Subject: [PATCH 10/14] fix changes.md merge --- CHANGES.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 37087eff9fe..0082d91c7fb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -16,9 +16,6 @@ -- Enforce empty lines before classes and functions with sticky leading comments (#3302) -- Implicitly concatenated strings used as function args are now wrapped inside - parentheses (#3307) - Fix a crash in preview style with assert + parenthesized string (#3415) - Fix crashes in preview style with walrus operators used in function return annotations and except clauses (#3423) From a6a68277b37ac5f3ffa79b2f6a09e17036cc37f1 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Tue, 7 Feb 2023 19:51:12 -0500 Subject: [PATCH 11/14] address review (dedent, more test cases, add more comments) --- src/black/lines.py | 114 +++++++++++++----------- tests/data/preview/multiline_strings.py | 18 ++++ 2 files changed, 79 insertions(+), 53 deletions(-) diff --git a/src/black/lines.py b/src/black/lines.py index 0ec6f0e75d5..be5dfa29c1d 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -737,62 +737,70 @@ def is_line_short_enough( # noqa: C901 if "\n" not in line_str: # No multiline strings (MLS) present return len(line_str) <= mode.line_length - else: - first, *_, last = line_str.split("\n") - if len(first) > mode.line_length or len(last) > mode.line_length: - return False - commas: List[int] = [] # tracks number of commas per depth level - multiline_string: Optional[Leaf] = None - multiline_string_contexts: List[LN] = [] - - max_level_to_update = math.inf - for i, leaf in enumerate(line.leaves): - if max_level_to_update == math.inf: - had_comma: Optional[int] = None - if leaf.bracket_depth + 1 > len(commas): - commas.append(0) - elif leaf.bracket_depth + 1 < len(commas): - had_comma = commas.pop() - if ( - had_comma is not None - and multiline_string is not None - and multiline_string.bracket_depth == leaf.bracket_depth + 1 - ): - # Have left the level with the MLS, stop tracking commas - max_level_to_update = leaf.bracket_depth - if had_comma > 0: - # MLS was in parens with at least one comma - force split - return False - - if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA: - # Ignore non-nested trailing comma - # directly after MLS/MLS-containing expression - ignore_ctxs: List[Optional[LN]] = [None] - ignore_ctxs += multiline_string_contexts - if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1): - commas[leaf.bracket_depth] += 1 - if max_level_to_update != math.inf: - max_level_to_update = min(max_level_to_update, leaf.bracket_depth) - - if is_multiline_string(leaf): - if len(multiline_string_contexts) > 0: - # >1 multiline string cannot fit on a single line - force split + first, *_, last = line_str.split("\n") + if len(first) > mode.line_length or len(last) > mode.line_length: + return False + + # Traverse the AST to examine the context around the multiline string (MLS), + # tracking aspects such as depth and comma existence, + # to determine whether to split the MLS or keep it together + # Depth (which is based on the existing bracket depth concept) + # is needed to determine nesting level of the MLS + # Include special cases for e.g. trailing commas and long line length + commas: List[int] = [] # tracks number of commas per depth level + multiline_string: Optional[Leaf] = None + # store the leaves that contain parts of the MLS + multiline_string_contexts: List[LN] = [] + + max_level_to_update = math.inf # track the depth of the MLS + for i, leaf in enumerate(line.leaves): + if max_level_to_update == math.inf: + had_comma: Optional[int] = None + if leaf.bracket_depth + 1 > len(commas): + commas.append(0) + elif leaf.bracket_depth + 1 < len(commas): + had_comma = commas.pop() + if ( + had_comma is not None + and multiline_string is not None + and multiline_string.bracket_depth == leaf.bracket_depth + 1 + ): + # Have left the level with the MLS, stop tracking commas + max_level_to_update = leaf.bracket_depth + if had_comma > 0: + # MLS was in parens with at least one comma - force split return False - multiline_string = leaf - ctx: LN = leaf - while str(ctx) in line_str: - multiline_string_contexts.append(ctx) - if ctx.parent is None: - break - ctx = ctx.parent - - # May not have a triple-quoted multiline string at all, - # in case of a regular string with embedded newlines and line continuations - if len(multiline_string_contexts) == 0: - return True - return all(val == 0 for val in commas) + if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA: + # Ignore non-nested trailing comma + # directly after MLS/MLS-containing expression + ignore_ctxs: List[Optional[LN]] = [None] + ignore_ctxs += multiline_string_contexts + if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1): + commas[leaf.bracket_depth] += 1 + if max_level_to_update != math.inf: + max_level_to_update = min(max_level_to_update, leaf.bracket_depth) + + if is_multiline_string(leaf): + if len(multiline_string_contexts) > 0: + # >1 multiline string cannot fit on a single line - force split + return False + multiline_string = leaf + ctx: LN = leaf + # fetch the tree structure around the MLS in the AST + while str(ctx) in line_str: + multiline_string_contexts.append(ctx) + if ctx.parent is None: + break + ctx = ctx.parent + + # May not have a triple-quoted multiline string at all, + # in case of a regular string with embedded newlines and line continuations + if len(multiline_string_contexts) == 0: + return True + + return all(val == 0 for val in commas) def can_be_split(line: Line) -> bool: diff --git a/tests/data/preview/multiline_strings.py b/tests/data/preview/multiline_strings.py index 3f7b431d920..bb517d128e2 100644 --- a/tests/data/preview/multiline_strings.py +++ b/tests/data/preview/multiline_strings.py @@ -2,6 +2,8 @@ say""", call(3, "dogsay", textwrap.dedent("""dove coo""" % "cowabunga")) +call(3, "dogsay", textwrap.dedent("""dove +coo""" % "cowabunga")) call(3, textwrap.dedent("""cow moo""" % "cowabunga"), "dogsay") call(3, "dogsay", textwrap.dedent("""crow @@ -45,6 +47,12 @@ a: 1 b: 2 """,) +data = yaml.load( + """\ + a: 1 + b: 2 +""" +) MULTILINE = """ foo @@ -157,6 +165,12 @@ def dastardly_default_value( textwrap.dedent("""dove coo""" % "cowabunga"), ) +call( + 3, + "dogsay", + textwrap.dedent("""dove +coo""" % "cowabunga"), +) call( 3, textwrap.dedent("""cow @@ -222,6 +236,10 @@ def dastardly_default_value( b: 2 """, ) +data = yaml.load("""\ + a: 1 + b: 2 +""") MULTILINE = """ foo From acb54c354e9dcb4194e4c89c1c2ac9693f287013 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Tue, 7 Feb 2023 20:22:29 -0500 Subject: [PATCH 12/14] tweak comments more --- src/black/lines.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/black/lines.py b/src/black/lines.py index 932c365b5c8..1a6652683b1 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -732,12 +732,12 @@ def is_line_short_enough( # noqa: C901 if len(first) > mode.line_length or len(last) > mode.line_length: return False - # Traverse the AST to examine the context around the multiline string (MLS), + # Traverse the AST to examine the context of the multiline string (MLS), # tracking aspects such as depth and comma existence, # to determine whether to split the MLS or keep it together # Depth (which is based on the existing bracket depth concept) # is needed to determine nesting level of the MLS - # Include special cases for e.g. trailing commas and long line length + # Includes special case for trailing commas commas: List[int] = [] # tracks number of commas per depth level multiline_string: Optional[Leaf] = None # store the leaves that contain parts of the MLS @@ -778,7 +778,7 @@ def is_line_short_enough( # noqa: C901 return False multiline_string = leaf ctx: LN = leaf - # fetch the tree structure around the MLS in the AST + # fetch the leaf components of the MLS in the AST while str(ctx) in line_str: multiline_string_contexts.append(ctx) if ctx.parent is None: From 909580cb62dd1bf5c07ddd6057f1d0fc08fe68d5 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Tue, 7 Feb 2023 20:38:23 -0500 Subject: [PATCH 13/14] punctuation --- src/black/lines.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/black/lines.py b/src/black/lines.py index 1a6652683b1..ed182bab124 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -734,10 +734,10 @@ def is_line_short_enough( # noqa: C901 # Traverse the AST to examine the context of the multiline string (MLS), # tracking aspects such as depth and comma existence, - # to determine whether to split the MLS or keep it together - # Depth (which is based on the existing bracket depth concept) - # is needed to determine nesting level of the MLS - # Includes special case for trailing commas + # to determine whether to split the MLS or keep it together. + # Depth (which is based on the existing bracket_depth concept) + # is needed to determine nesting level of the MLS. + # Includes special case for trailing commas. commas: List[int] = [] # tracks number of commas per depth level multiline_string: Optional[Leaf] = None # store the leaves that contain parts of the MLS From b2f76377ac988da45890f7397f1846c1f8ae5c66 Mon Sep 17 00:00:00 2001 From: Olivia Hong Date: Wed, 8 Feb 2023 13:13:15 -0500 Subject: [PATCH 14/14] add missing import --- src/black/lines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/black/lines.py b/src/black/lines.py index ed182bab124..b65604864a4 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -16,7 +16,7 @@ ) from black.brackets import DOT_PRIORITY, BracketTracker -from black.mode import Mode +from black.mode import Mode, Preview from black.nodes import ( BRACKETS, CLOSING_BRACKETS,