Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

msgpack: truncate long string prefix refinements #167

Merged
merged 2 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 0 additions & 4 deletions cty/ctystrings/prefix.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,6 @@ import (
// application can guarantee that the remainder of the string will not begin
// with combining marks then it is safe to instead just normalize the prefix
// string with [Normalize].
//
// Note that this function only takes into account normalization boundaries
// and does _not_ take into account grapheme cluster boundaries as defined
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume this is a mistake, unless "takes into account grapheme cluster boundaries" is intended to have a different meaning here than in line 14.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think what I was intending to say here is that this function trims off zero or more normalization chunks so as to avoid the possibility that the resulting string might end with one that can be the prefix of a grapheme cluster chunk. Or in other words, this function potentially violates the usual rule that cty treats grapheme clusters as indivisible units under substring operations; it in fact intentionally divides them because it's trying to detect the possibility of partial ones.

But I agree that this comment is more confusing than helpful in retrospect, so I'm fine with removing it and if some confusion arises later about this the discussion about it will hopefully suggest a clearer way to explain the tradeoff this function is making.

// by Unicode Standard Annex #29.
func SafeKnownPrefix(prefix string) string {
prefix = Normalize(prefix)

Expand Down
57 changes: 56 additions & 1 deletion cty/msgpack/roundtrip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package msgpack

import (
"fmt"
"strings"
"testing"

"github.com/zclconf/go-cty/cty"
Expand Down Expand Up @@ -50,7 +51,6 @@ func TestRoundTrip(t *testing.T) {
cty.UnknownVal(cty.String).Refine().NotNull().StringPrefix("foo-").NewValue(),
cty.String,
},

{
cty.True,
cty.Bool,
Expand Down Expand Up @@ -350,3 +350,58 @@ func TestRoundTrip(t *testing.T) {
})
}
}

// Unknown values with very long string prefix refinements do not round-trip
// losslessly. If the prefix is longer than 256 bytes it will be truncated to
// a maximum of 256 bytes.
func TestRoundTrip_truncatesStringPrefixRefinement(t *testing.T) {
tests := []struct {
Value cty.Value
Type cty.Type
RoundTripValue cty.Value
}{
{
cty.UnknownVal(cty.String).Refine().StringPrefix(strings.Repeat("a", 1024)).NewValue(),
cty.String,
cty.UnknownVal(cty.String).Refine().StringPrefix(strings.Repeat("a", 255)).NewValue(),
},
{
cty.UnknownVal(cty.String).Refine().NotNull().StringPrefix(strings.Repeat("b", 1024)).NewValue(),
cty.String,
cty.UnknownVal(cty.String).Refine().NotNull().StringPrefix(strings.Repeat("b", 255)).NewValue(),
},
{
cty.UnknownVal(cty.String).Refine().StringPrefix(strings.Repeat("c", 255) + "-").NewValue(),
cty.String,
cty.UnknownVal(cty.String).Refine().StringPrefix(strings.Repeat("c", 255) + "-").NewValue(),
},
{
cty.UnknownVal(cty.String).Refine().StringPrefix(strings.Repeat("d", 255) + "🤷🤷").NewValue(),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to add comments for any of these that need it. For example, in this case the StringPrefix() call trims the final "🤷", then the remaining "🤷" is trimmed during serialisation.


cty.String,
cty.UnknownVal(cty.String).Refine().StringPrefix(strings.Repeat("d", 255)).NewValue(),
},
}

for _, test := range tests {
t.Run(fmt.Sprintf("%#v as %#v", test.Value, test.Type), func(t *testing.T) {
b, err := Marshal(test.Value, test.Type)
if err != nil {
t.Fatal(err)
}

t.Logf("encoded as %x", b)

got, err := Unmarshal(b, test.Type)
if err != nil {
t.Fatal(err)
}

if !got.RawEquals(test.RoundTripValue) {
t.Errorf(
"unexpected value after round-trip\ninput: %#v\nexpect: %#v\nresult: %#v",
test.Value, test.RoundTripValue, got)
}
})
}
}
11 changes: 11 additions & 0 deletions cty/msgpack/unknown.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/vmihailenco/msgpack/v5"
"github.com/zclconf/go-cty/cty"
"github.com/zclconf/go-cty/cty/ctystrings"
)

type unknownType struct{}
Expand Down Expand Up @@ -85,6 +86,16 @@ func marshalUnknownValue(rng cty.ValueRange, path cty.Path, enc *msgpack.Encoder
}
case rng.TypeConstraint() == cty.String:
if prefix := rng.StringPrefix(); prefix != "" {
// To ensure the total size of the refinements blob does not exceed
// the limit set by our decoder, truncate the prefix string.
// We could allow up to 1018 bytes here if we assume that this
// refinement will only ever be combined with NotNull(), but there
// is no need for such long prefix refinements at the moment.
maxPrefixLength := 256
if len(prefix) > maxPrefixLength {
prefix = prefix[:maxPrefixLength-1]
prefix = ctystrings.SafeKnownPrefix(prefix)
}
mapLen++
refnEnc.EncodeInt(int64(unknownValStringPrefix))
refnEnc.EncodeString(prefix)
Expand Down