diff --git a/acceptance_tests/nul-separator.sh b/acceptance_tests/nul-separator.sh new file mode 100755 index 0000000000..c503dcdcbb --- /dev/null +++ b/acceptance_tests/nul-separator.sh @@ -0,0 +1,286 @@ +#!/bin/bash + +setUp() { + rm test*.yml || true +} + +## Convenient bash shortcut to read records of NUL separated values +## from stdin the safe way. See example usage in the next tests. +read-0() { + local eof="" IFS='' + while [ "$1" ]; do + ## - The `-r` avoids bad surprise with '\n' and other interpreted + ## sequences that can be read. + ## - The `-d ''` is the (strange?) way to refer to NUL delimiter. + ## - The `--` is how to avoid unpleasant surprises if your + ## "$1" starts with "-" (minus) sign. This protection also + ## will produce a readable error if you want to try to start + ## your variable names with a "-". + read -r -d '' -- "$1" || eof=1 + shift + done + [ -z "$eof" ] ## fail on EOF +} + +## Convenient bash shortcut to be used with the next function `p-err` +## to read NUL separated values the safe way AND catch any errors from +## the process creating the stream of NUL separated data. See example +## usage in the tests. +read-0-err() { + local ret="$1" eof="" idx=0 last= + read -r -- "${ret?}" <<<"0" + shift + while [ "$1" ]; do + last=$idx + read -r -d '' -- "$1" || { + ## Put this last value in ${!ret} + eof="$1" + read -r -- "$ret" <<<"${!eof}" + break + } + ((idx++)) + shift + done + [ -z "$eof" ] || { + if [ "$last" != 0 ]; then + ## Uhoh, we have no idea if the errorlevel of the internal + ## command was properly delimited with a NUL char, and + ## anyway something went really wrong at least about the + ## number of fields separated by NUL char and the one + ## expected. + echo "Error: read-0-err couldn't fill all value $ret = '${!ret}', '$eof', '${!eof}'" >&2 + read -r -- "$ret" <<<"not-enough-values" + else + if ! [[ "${!ret}" =~ ^[0-9]+$ && "${!ret}" -ge 0 && "${!ret}" -le 127 ]]; then + ## This could happen if you don't use `p-err` wrapper, + ## or used stdout in unexpected ways in your inner + ## command. + echo "Error: last value is not a number, did you finish with an errorlevel ?" >&2 + read -r -- "$ret" <<<"last-value-not-a-number" + fi + fi + false + } +} + +## Simply runs command given as argument and adds errorlevel in the +## standard output. Is expected to be used in tandem with +## `read-0-err`. +p-err() { + local exp="$1" + "$@" + printf "%s" "$?" +} + +wyq-r() { + local exp="$1" + ./yq e -0 -r=false "$1" + printf "%s" "$?" +} + +testBasicUsageRaw() { + cat >test.yml < expected.out + + ## We need to compare binary content here. We have to filter the compared + ## content through a representation that gets rid of NUL chars but accurately + ## transcribe the content. + ## Also as it would be nice to have a pretty output in case the test fails, + ## we use here 'hd': a widely available shortcut to 'hexdump' that will + ## pretty-print any binary to it's hexadecimal representation. + ## + ## Note that the standard `assertEquals` compare its arguments + ## value, but they can't hold NUL characters (this comes from the + ## limitation of the C API of `exec*(..)` functions that requires + ## `const char *arv[]`). And these are NUL terminated strings. As a + ## consequence, the NUL characters gets removed in bash arguments. + assertEquals "$(hd expected.out)" \ + "$(./yq e -0 '.a, .b' test.yml | hd)" + + rm expected.out +} + +testBasicUsage() { + local a b + cat >test.yml <test.yml <test.yml <test.yml <test.yml <test.yml <test.yml <= 2 && data[n-2] == '\r' && data[n-1] == '\n' { + b.Truncate(n - 2) + } else if n >= 1 && (data[n-1] == '\r' || data[n-1] == '\n') { + b.Truncate(n - 1) + } +} + func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error { log.Debug("PrintResults for %v matches", matchingNodes.Len()) @@ -128,18 +148,40 @@ func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error { } } - if err := p.encoder.PrintLeadingContent(writer, mappedDoc.LeadingContent); err != nil { + var destination io.Writer = writer + tempBuffer := bytes.NewBuffer(nil) + if p.nulSepOutput { + destination = tempBuffer + } + + if err := p.encoder.PrintLeadingContent(destination, mappedDoc.LeadingContent); err != nil { return err } - if err := p.printNode(mappedDoc.Node, writer); err != nil { + if err := p.printNode(mappedDoc.Node, destination); err != nil { return err } - if err := p.encoder.PrintLeadingContent(writer, mappedDoc.TrailingContent); err != nil { + if err := p.encoder.PrintLeadingContent(destination, mappedDoc.TrailingContent); err != nil { return err } + if p.nulSepOutput { + removeLastEOL(tempBuffer) + tempBufferBytes := tempBuffer.Bytes() + if bytes.IndexByte(tempBufferBytes, 0) != -1 { + return fmt.Errorf( + "Can't serialize value because it contains NUL char and you are using NUL separated output", + ) + } + if _, err := writer.Write(tempBufferBytes); err != nil { + return err + } + if _, err := writer.Write([]byte{0}); err != nil { + return err + } + } + p.previousDocIndex = mappedDoc.Document if err := writer.Flush(); err != nil { return err diff --git a/pkg/yqlib/printer_test.go b/pkg/yqlib/printer_test.go index f3be640119..4b32f37166 100644 --- a/pkg/yqlib/printer_test.go +++ b/pkg/yqlib/printer_test.go @@ -340,3 +340,53 @@ func TestPrinterMultipleDocsJson(t *testing.T) { writer.Flush() test.AssertResult(t, expected, output.String()) } + +func TestPrinterNulSeparator(t *testing.T) { + var output bytes.Buffer + var writer = bufio.NewWriter(&output) + printer := NewSimpleYamlPrinter(writer, YamlOutputFormat, true, false, 2, false) + printer.SetNulSepOutput(true) + node, err := getExpressionParser().ParseExpression(".a") + if err != nil { + panic(err) + } + streamEvaluator := NewStreamEvaluator() + _, err = streamEvaluator.Evaluate("sample", strings.NewReader(multiDocSample), node, printer, NewYamlDecoder(ConfiguredYamlPreferences)) + if err != nil { + panic(err) + } + + writer.Flush() + expected := "banana\x00apple\x00coconut\x00" + test.AssertResult(t, expected, output.String()) +} + +func TestPrinterNulSeparatorWithJson(t *testing.T) { + var output bytes.Buffer + var writer = bufio.NewWriter(&output) + // note printDocSeparators is true, it should still not print document separators + // when outputing JSON. + encoder := NewJSONEncoder(0, false, false) + if encoder == nil { + t.Skipf("no support for %s output format", "json") + } + printer := NewPrinter(encoder, NewSinglePrinterWriter(writer)) + printer.SetNulSepOutput(true) + + inputs, err := readDocuments(strings.NewReader(multiDocSample), "sample.yml", 0, NewYamlDecoder(ConfiguredYamlPreferences)) + if err != nil { + panic(err) + } + + inputs.Front().Value.(*CandidateNode).LeadingContent = "# ignore this\n" + + err = printer.PrintResults(inputs) + if err != nil { + panic(err) + } + + expected := `{"a":"banana"}` + "\x00" + `{"a":"apple"}` + "\x00" + `{"a":"coconut"}` + "\x00" + + writer.Flush() + test.AssertResult(t, expected, output.String()) +}