astral-sh · MichaReiser · Feb 14, 2024 · Feb 14, 2024 · Feb 14, 2024
@@ -0,0 +1,212 @@
+use std::iter::FusedIterator;
+
+use memchr::memchr2;
+
+use ruff_python_ast::{
+    self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
+    StringLiteral,
+};
+use ruff_source_file::Locator;
+use ruff_text_size::{Ranged, TextLen, TextRange};
+
+use crate::expression::expr_f_string::f_string_quoting;
+use crate::other::f_string::FormatFString;
+use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
+use crate::prelude::*;
+use crate::string::{Quoting, StringPrefix, StringQuotes};
+
+/// Represents any kind of string expression. This could be either a string,
+/// bytes or f-string.
+#[derive(Copy, Clone, Debug)]
+pub(crate) enum AnyString<'a> {
+    String(&'a ExprStringLiteral),
+    Bytes(&'a ExprBytesLiteral),
+    FString(&'a ExprFString),
+}
+
+impl<'a> AnyString<'a> {
+    /// Creates a new [`AnyString`] from the given [`Expr`].
+    ///
+    /// Returns `None` if the expression is not either a string, bytes or f-string.
+    pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
+        match expression {
+            Expr::StringLiteral(string) => Some(AnyString::String(string)),
+            Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)),
+            Expr::FString(fstring) => Some(AnyString::FString(fstring)),
+            _ => None,
+        }
+    }
+
+    /// Returns `true` if the string is implicitly concatenated.
+    pub(crate) fn is_implicit_concatenated(self) -> bool {
+        match self {
+            Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
+            Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
+            Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
+        }
+    }
+
+    /// Returns the quoting to be used for this string.
+    pub(super) fn quoting(self, locator: &Locator<'_>) -> Quoting {
+        match self {
+            Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
+            Self::FString(f_string) => f_string_quoting(f_string, locator),
+        }
+    }
+
+    /// Returns a vector of all the [`AnyStringPart`] of this string.
+    pub(super) fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> {
+        match self {
+            Self::String(ExprStringLiteral { value, .. }) => {
+                AnyStringPartsIter::String(value.iter())
+            }
+            Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()),
+            Self::FString(ExprFString { value, .. }) => {
+                AnyStringPartsIter::FString(value.iter(), quoting)
+            }
+        }
+    }
+
+    pub(crate) fn is_multiline(self, source: &str) -> bool {
+        match self {
+            AnyString::String(_) | AnyString::Bytes(_) => {
+                let contents = &source[self.range()];
+                let prefix = StringPrefix::parse(contents);
+                let quotes = StringQuotes::parse(
+                    &contents[TextRange::new(prefix.text_len(), contents.text_len())],
+                );
+
+                quotes.is_some_and(StringQuotes::is_triple)
+                    && memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
+            }
+            AnyString::FString(fstring) => {
+                memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
+            }
+        }
+    }
+}
+
+impl Ranged for AnyString<'_> {
+    fn range(&self) -> TextRange {
+        match self {
+            Self::String(expr) => expr.range(),
+            Self::Bytes(expr) => expr.range(),
+            Self::FString(expr) => expr.range(),
+        }
+    }
+}
+
+impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
+    fn from(value: &AnyString<'a>) -> Self {
+        match value {
+            AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr),
+            AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr),
+            AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
+        }
+    }
+}
+
+impl<'a> From<AnyString<'a>> for AnyNodeRef<'a> {
+    fn from(value: AnyString<'a>) -> Self {
+        AnyNodeRef::from(&value)
+    }
+}
+
+impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
+    fn from(value: &AnyString<'a>) -> Self {
+        match value {
+            AnyString::String(expr) => ExpressionRef::StringLiteral(expr),
+            AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr),
+            AnyString::FString(expr) => ExpressionRef::FString(expr),
+        }
+    }
+}
+
+pub(super) enum AnyStringPartsIter<'a> {
+    String(std::slice::Iter<'a, StringLiteral>),
+    Bytes(std::slice::Iter<'a, ast::BytesLiteral>),
+    FString(std::slice::Iter<'a, ast::FStringPart>, Quoting),
+}
+
+impl<'a> Iterator for AnyStringPartsIter<'a> {
+    type Item = AnyStringPart<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let part = match self {
+            Self::String(inner) => {
+                let part = inner.next()?;
+                AnyStringPart::String {
+                    part,
+                    layout: StringLiteralKind::String,
+                }
+            }
+            Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?),
+            Self::FString(inner, quoting) => {
+                let part = inner.next()?;
+                match part {
+                    ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
+                        part: string_literal,
+                        layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting),
+                    },
+                    ast::FStringPart::FString(f_string) => AnyStringPart::FString {
+                        part: f_string,
+                        quoting: *quoting,
+                    },
+                }
+            }
+        };
+
+        Some(part)
+    }
+}
+
+impl FusedIterator for AnyStringPartsIter<'_> {}
+
+/// Represents any kind of string which is part of an implicitly concatenated
+/// string. This could be either a string, bytes or f-string.
+///
+/// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
+#[derive(Clone, Debug)]
+pub(super) enum AnyStringPart<'a> {
+    String {
+        part: &'a ast::StringLiteral,
+        layout: StringLiteralKind,
+    },
+    Bytes(&'a ast::BytesLiteral),
+    FString {
+        part: &'a ast::FString,
+        quoting: Quoting,
+    },
+}
+
+impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
+    fn from(value: &AnyStringPart<'a>) -> Self {
+        match value {
+            AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
+            AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
+            AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
+        }
+    }
+}
+
+impl Ranged for AnyStringPart<'_> {
+    fn range(&self) -> TextRange {
+        match self {
+            Self::String { part, .. } => part.range(),
+            Self::Bytes(part) => part.range(),
+            Self::FString { part, .. } => part.range(),
+        }
+    }
+}
+
+impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
+    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
+        match self {
+            AnyStringPart::String { part, layout } => {
+                FormatStringLiteral::new(part, *layout).fmt(f)
+            }
+            AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
+            AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
+        }
+    }
+}
@@ -109,7 +109,7 @@ use super::{NormalizedString, QuoteChar};
 /// `indent-width * spaces` to tabs because doing so could break ASCII art and other docstrings
 /// that use spaces for alignment.
 pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
-    let docstring = &normalized.text;
+    let docstring = &normalized.text();
 
     // Black doesn't change the indentation of docstrings that contain an escaped newline
     if contains_unescaped_newline(docstring) {
@@ -125,7 +125,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
     let mut lines = docstring.split('\n').peekable();
 
     // Start the string
-    write!(f, [normalized.prefix, normalized.quotes])?;
+    write!(f, [normalized.prefix(), normalized.quotes()])?;
     // We track where in the source docstring we are (in source code byte offsets)
     let mut offset = normalized.start();
 
@@ -141,7 +141,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
 
     // Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
     // inner quotes and closing quotes from getting to close to avoid `""""content`
-    if trim_both.starts_with(normalized.quotes.quote_char.as_char()) {
+    if trim_both.starts_with(normalized.quotes().quote_char.as_char()) {
         space().fmt(f)?;
     }
 
@@ -168,7 +168,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
         {
             space().fmt(f)?;
         }
-        normalized.quotes.fmt(f)?;
+        normalized.quotes().fmt(f)?;
         return Ok(());
     }
 
@@ -194,7 +194,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
         offset,
         stripped_indentation,
         already_normalized,
-        quote_char: normalized.quotes.quote_char,
+        quote_char: normalized.quotes().quote_char,
         code_example: CodeExample::default(),
     }
     .add_iter(lines)?;
@@ -207,7 +207,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
         space().fmt(f)?;
     }
 
-    write!(f, [normalized.quotes])
+    write!(f, [normalized.quotes()])
 }
 
 fn contains_unescaped_newline(haystack: &str) -> bool {
@@ -1569,7 +1569,7 @@ fn docstring_format_source(
 /// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
 /// so `content\\ """` doesn't need a space while `content\\\ """` does.
 fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
-    trim_end.ends_with(normalized.quotes.quote_char.as_char())
+    trim_end.ends_with(normalized.quotes().quote_char.as_char())
         || trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
 }