Skip to content

Commit

Permalink
Performance: Token Type Utility Analyzer: Avoid allocations (#6785)
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-strecker-sonarsource committed Mar 1, 2023
1 parent 59ce476 commit 2eb0a0c
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 116 deletions.
Expand Up @@ -25,12 +25,15 @@ public class TokenTypeAnalyzer : TokenTypeAnalyzerBase<SyntaxKind>
{
protected override ILanguageFacade<SyntaxKind> Language { get; } = CSharpFacade.Instance;

protected override TokenClassifierBase GetTokenClassifier(SyntaxToken token, SemanticModel semanticModel, bool skipIdentifierTokens) =>
new TokenClassifier(token, semanticModel, skipIdentifierTokens);
protected override TokenClassifierBase GetTokenClassifier(SemanticModel semanticModel, bool skipIdentifierTokens) =>
new TokenClassifier(semanticModel, skipIdentifierTokens);

protected override TriviaClassifierBase GetTriviaClassifier() =>
new TriviaClassifier();

private sealed class TokenClassifier : TokenClassifierBase
{
public TokenClassifier(SyntaxToken token, SemanticModel semanticModel, bool skipIdentifiers) : base(token, semanticModel, skipIdentifiers) { }
public TokenClassifier(SemanticModel semanticModel, bool skipIdentifiers) : base(semanticModel, skipIdentifiers) { }

protected override SyntaxNode GetBindableParent(SyntaxToken token) =>
token.GetBindableParent();
Expand All @@ -41,9 +44,6 @@ private sealed class TokenClassifier : TokenClassifierBase
protected override bool IsKeyword(SyntaxToken token) =>
SyntaxFacts.IsKeywordKind(token.Kind());

protected override bool IsRegularComment(SyntaxTrivia trivia) =>
trivia.IsAnyKind(SyntaxKind.SingleLineCommentTrivia, SyntaxKind.MultiLineCommentTrivia);

protected override bool IsNumericLiteral(SyntaxToken token) =>
token.IsKind(SyntaxKind.NumericLiteralToken);

Expand All @@ -63,6 +63,12 @@ private sealed class TokenClassifier : TokenClassifierBase
SyntaxKind.InterpolatedStringTextToken,
SyntaxKind.InterpolatedStringEndToken,
SyntaxKindEx.InterpolatedRawStringEndToken);
}

private sealed class TriviaClassifier : TriviaClassifierBase
{
protected override bool IsRegularComment(SyntaxTrivia trivia) =>
trivia.IsAnyKind(SyntaxKind.SingleLineCommentTrivia, SyntaxKind.MultiLineCommentTrivia);

protected override bool IsDocComment(SyntaxTrivia trivia) =>
trivia.IsAnyKind(SyntaxKind.SingleLineDocumentationCommentTrivia, SyntaxKind.MultiLineDocumentationCommentTrivia);
Expand Down
Expand Up @@ -20,6 +20,7 @@

using Microsoft.CodeAnalysis.Text;
using SonarAnalyzer.Protobuf;
using static SonarAnalyzer.Protobuf.TokenTypeInfo.Types;

namespace SonarAnalyzer.Rules
{
Expand All @@ -34,36 +35,62 @@ public abstract class TokenTypeAnalyzerBase<TSyntaxKind> : UtilityAnalyzerBase<T

protected TokenTypeAnalyzerBase() : base(DiagnosticId, Title) { }

protected abstract TokenClassifierBase GetTokenClassifier(SyntaxToken token, SemanticModel semanticModel, bool skipIdentifierTokens);
protected abstract TokenClassifierBase GetTokenClassifier(SemanticModel semanticModel, bool skipIdentifierTokens);
protected abstract TriviaClassifierBase GetTriviaClassifier();

protected sealed override TokenTypeInfo CreateMessage(SyntaxTree syntaxTree, SemanticModel semanticModel)
{
var tokens = syntaxTree.GetRoot().DescendantTokens();
var identifierTokenKind = Language.SyntaxKind.IdentifierToken; // Performance optimization
var skipIdentifierTokens = tokens.Count(token => Language.Syntax.IsKind(token, identifierTokenKind)) > IdentifierTokenCountThreshold;

var spans = new List<TokenTypeInfo.Types.TokenInfo>();
var skipIdentifierTokens = tokens
.Where(token => Language.Syntax.IsKind(token, identifierTokenKind))
.Skip(IdentifierTokenCountThreshold)
.Any();

var tokenClassifier = GetTokenClassifier(semanticModel, skipIdentifierTokens);
var triviaClassifier = GetTriviaClassifier();
var spans = new List<TokenInfo>();
// The second iteration of the tokens is intended since there is no processing done and we want to avoid copying all the tokens to a second collection.
foreach (var token in tokens)
{
spans.AddRange(GetTokenClassifier(token, semanticModel, skipIdentifierTokens).Spans);
if (token.HasLeadingTrivia)
{
IterateTrivia(triviaClassifier, spans, token.LeadingTrivia);
}
if (tokenClassifier.ClassifyToken(token) is { } tokenClassification)
{
spans.Add(tokenClassification);
}
if (token.HasTrailingTrivia)
{
IterateTrivia(triviaClassifier, spans, token.TrailingTrivia);
}
}

var tokenTypeInfo = new TokenTypeInfo
{
FilePath = syntaxTree.FilePath
};

tokenTypeInfo.TokenInfo.AddRange(spans.OrderBy(s => s.TextRange.StartLine).ThenBy(s => s.TextRange.StartOffset));
tokenTypeInfo.TokenInfo.AddRange(spans);
return tokenTypeInfo;

static void IterateTrivia(TriviaClassifierBase triviaClassifier, List<TokenInfo> spans, SyntaxTriviaList triviaList)
{
foreach (var trivia in triviaList)
{
if (triviaClassifier.ClassifyTrivia(trivia) is { } triviaClassification)
{
spans.Add(triviaClassification);
}
}
}
}

protected abstract class TokenClassifierBase
{
private readonly SyntaxToken token;
private readonly SemanticModel semanticModel;
private readonly bool skipIdentifiers;
private readonly List<TokenTypeInfo.Types.TokenInfo> spans = new();
private static readonly ISet<MethodKind> ConstructorKinds = new HashSet<MethodKind>
{
MethodKind.Constructor,
Expand All @@ -80,130 +107,88 @@ protected abstract class TokenClassifierBase
};

protected abstract SyntaxNode GetBindableParent(SyntaxToken token);
protected abstract bool IsDocComment(SyntaxTrivia trivia);
protected abstract bool IsRegularComment(SyntaxTrivia trivia);
protected abstract bool IsKeyword(SyntaxToken token);
protected abstract bool IsIdentifier(SyntaxToken token);
protected abstract bool IsNumericLiteral(SyntaxToken token);
protected abstract bool IsStringLiteral(SyntaxToken token);

protected TokenClassifierBase(SyntaxToken token, SemanticModel semanticModel, bool skipIdentifiers)
protected TokenClassifierBase(SemanticModel semanticModel, bool skipIdentifiers)
{
this.token = token;
this.semanticModel = semanticModel;
this.skipIdentifiers = skipIdentifiers;
}

public IEnumerable<TokenTypeInfo.Types.TokenInfo> Spans
{
get
public TokenInfo ClassifyToken(SyntaxToken token) =>
token switch
{
spans.Clear();
ClassifyToken();
_ when IsKeyword(token) => TokenInfo(token, TokenType.Keyword),
_ when IsStringLiteral(token) => TokenInfo(token, TokenType.StringLiteral),
_ when IsNumericLiteral(token) => TokenInfo(token, TokenType.NumericLiteral),
_ when IsIdentifier(token) && !skipIdentifiers => ClassifyIdentifier(token),
_ => null,
};

foreach (var trivia in token.LeadingTrivia)
private static TokenInfo TokenInfo(SyntaxToken token, TokenType tokenType) =>
string.IsNullOrWhiteSpace(token.ValueText)
? null
: new()
{
ClassifyTrivia(trivia);
}

foreach (var trivia in token.TrailingTrivia)
{
ClassifyTrivia(trivia);
}

return spans;
}
}

private void CollectClassified(TokenType tokenType, TextSpan span)
{
if (string.IsNullOrWhiteSpace(token.SyntaxTree.GetText().GetSubText(span).ToString()))
{
return;
}

spans.Add(new TokenTypeInfo.Types.TokenInfo
{
TokenType = tokenType,
TextRange = GetTextRange(Location.Create(token.SyntaxTree, span).GetLineSpan())
});
}
TokenType = tokenType,
TextRange = GetTextRange(token.GetLocation().GetLineSpan()),
};

private void ClassifyToken()
private TokenInfo ClassifyIdentifier(SyntaxToken token)
{
if (IsKeyword(token))
{
CollectClassified(TokenType.Keyword, token.Span);
}
else if (IsStringLiteral(token))
if (semanticModel.GetDeclaredSymbol(token.Parent) is { } declaration)
{
CollectClassified(TokenType.StringLiteral, token.Span);
return ClassifyIdentifier(token, declaration);
}
else if (IsNumericLiteral(token))
else if (GetBindableParent(token) is { } parent && semanticModel.GetSymbolInfo(parent).Symbol is { } symbol)
{
CollectClassified(TokenType.NumericLiteral, token.Span);
return ClassifyIdentifier(token, symbol);
}
else if (IsIdentifier(token) && !skipIdentifiers)
else
{
ClassifyIdentifier();
return null;
}
}

private void ClassifyIdentifier()
{
if (semanticModel.GetDeclaredSymbol(token.Parent) is { } declaration)
{
ClassifyIdentifier(declaration);
}
else if (GetBindableParent(token) is { } parent && semanticModel.GetSymbolInfo(parent).Symbol is { } symbol)
{
ClassifyIdentifier(symbol);
}
}
private TokenInfo ClassifyIdentifier(SyntaxToken token, ISymbol symbol) =>
symbol switch
{
IAliasSymbol alias => ClassifyIdentifier(token, alias.Target),
IMethodSymbol ctorSymbol when ConstructorKinds.Contains(ctorSymbol.MethodKind) => TokenInfo(token, TokenType.TypeName),
_ when token.ValueText == "var" && VarSymbolKinds.Contains(symbol.Kind) => TokenInfo(token, TokenType.Keyword),
{ Kind: SymbolKind.Parameter, IsImplicitlyDeclared: true } when token.ValueText == "value" => TokenInfo(token, TokenType.Keyword),
{ Kind: SymbolKind.NamedType or SymbolKind.TypeParameter } => TokenInfo(token, TokenType.TypeName),
{ Kind: SymbolKind.DynamicType } => TokenInfo(token, TokenType.Keyword),
_ => null,
};
}

private void ClassifyIdentifier(ISymbol symbol)
{
if (symbol.Kind == SymbolKind.Alias)
{
ClassifyIdentifier(((IAliasSymbol)symbol).Target);
}
else if (symbol is IMethodSymbol ctorSymbol && ConstructorKinds.Contains(ctorSymbol.MethodKind))
{
CollectClassified(TokenType.TypeName, token.Span);
}
else if (token.ToString() == "var" && VarSymbolKinds.Contains(symbol.Kind))
{
CollectClassified(TokenType.Keyword, token.Span);
}
else if (token.ToString() == "value" && symbol.Kind == SymbolKind.Parameter && symbol.IsImplicitlyDeclared)
{
CollectClassified(TokenType.Keyword, token.Span);
}
else if (symbol.Kind == SymbolKind.NamedType || symbol.Kind == SymbolKind.TypeParameter)
{
CollectClassified(TokenType.TypeName, token.Span);
}
else if (symbol.Kind == SymbolKind.DynamicType)
{
CollectClassified(TokenType.Keyword, token.Span);
}
}
protected abstract class TriviaClassifierBase
{
protected abstract bool IsDocComment(SyntaxTrivia trivia);
protected abstract bool IsRegularComment(SyntaxTrivia trivia);

private void ClassifyTrivia(SyntaxTrivia trivia)
{
if (IsRegularComment(trivia))
public TokenInfo ClassifyTrivia(SyntaxTrivia trivia) =>
trivia switch
{
CollectClassified(TokenType.Comment, trivia.Span);
}
else if (IsDocComment(trivia))
_ when IsRegularComment(trivia) => TokenInfo(trivia.SyntaxTree, TokenType.Comment, trivia.Span),
_ when IsDocComment(trivia) => ClassifyDocComment(trivia),
// Handle preprocessor directives here
_ => null,
};

private TokenInfo TokenInfo(SyntaxTree tree, TokenType tokenType, TextSpan span) =>
new()
{
ClassifyDocComment(trivia);
}
// Handle preprocessor directives here
}
TokenType = tokenType,
TextRange = GetTextRange(Location.Create(tree, span).GetLineSpan())
};

private void ClassifyDocComment(SyntaxTrivia trivia) =>
CollectClassified(TokenType.Comment, trivia.FullSpan);
private TokenInfo ClassifyDocComment(SyntaxTrivia trivia) =>
TokenInfo(trivia.SyntaxTree, TokenType.Comment, trivia.FullSpan);
}
}
}
Expand Up @@ -25,12 +25,15 @@ public class TokenTypeAnalyzer : TokenTypeAnalyzerBase<SyntaxKind>
{
protected override ILanguageFacade<SyntaxKind> Language { get; } = VisualBasicFacade.Instance;

protected override TokenClassifierBase GetTokenClassifier(SyntaxToken token, SemanticModel semanticModel, bool skipIdentifierTokens) =>
new TokenClassifier(token, semanticModel, skipIdentifierTokens);
protected override TokenClassifierBase GetTokenClassifier(SemanticModel semanticModel, bool skipIdentifierTokens) =>
new TokenClassifier(semanticModel, skipIdentifierTokens);

protected override TriviaClassifierBase GetTriviaClassifier() =>
new TriviaClassifier();

private sealed class TokenClassifier : TokenClassifierBase
{
public TokenClassifier(SyntaxToken token, SemanticModel semanticModel, bool skipIdentifiers) : base(token, semanticModel, skipIdentifiers) { }
public TokenClassifier(SemanticModel semanticModel, bool skipIdentifiers) : base(semanticModel, skipIdentifiers) { }

protected override SyntaxNode GetBindableParent(SyntaxToken token) =>
token.GetBindableParent();
Expand All @@ -41,9 +44,6 @@ private sealed class TokenClassifier : TokenClassifierBase
protected override bool IsKeyword(SyntaxToken token) =>
SyntaxFacts.IsKeywordKind(token.Kind());

protected override bool IsRegularComment(SyntaxTrivia trivia) =>
trivia.IsKind(SyntaxKind.CommentTrivia);

protected override bool IsNumericLiteral(SyntaxToken token) =>
token.IsAnyKind(SyntaxKind.DecimalLiteralToken, SyntaxKind.FloatingLiteralToken, SyntaxKind.IntegerLiteralToken);

Expand All @@ -53,6 +53,12 @@ private sealed class TokenClassifier : TokenClassifierBase
SyntaxKind.CharacterLiteralToken,
SyntaxKind.InterpolatedStringTextToken,
SyntaxKind.EndOfInterpolatedStringToken);
}

private sealed class TriviaClassifier : TriviaClassifierBase
{
protected override bool IsRegularComment(SyntaxTrivia trivia) =>
trivia.IsKind(SyntaxKind.CommentTrivia);

protected override bool IsDocComment(SyntaxTrivia trivia) =>
trivia.IsKind(SyntaxKind.DocumentationCommentTrivia);
Expand Down

0 comments on commit 2eb0a0c

Please sign in to comment.