Skip to content

Commit

Permalink
Add --nul-output|-0 flag to separate element with NUL character
Browse files Browse the repository at this point in the history
This is to ensure solid parsing of complex data (with any binary
content except NUL chars) by separating the `yq` root collection
member's output with NUL char. As a safe-guard, an error will be cast
if trying to use NUL character with content that contains itself NUL
characters inside.
  • Loading branch information
vaab committed Feb 8, 2023
1 parent 88a6b20 commit e5785f0
Show file tree
Hide file tree
Showing 6 changed files with 304 additions and 3 deletions.
251 changes: 251 additions & 0 deletions acceptance_tests/nul-separator.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
#!/bin/bash

setUp() {
rm test*.yml || true
}


## We need to compare files as arguments in bash
## can't use NUL characters.
assertFileSame() {
local fileA="$1" fileB="$2"

assertEquals "$(cat "$fileA" | hd)" \
"$(cat "$fileB" | hd)"
}


read-0() {
local eof="" IFS=''
while [ "$1" ]; do
read -r -d '' -- "$1" || eof=1
shift
done
[ -z "$eof" ]
}


read-0-err() {
local ret="$1" eof="" idx=0 last=
read -r -- "${ret?}" <<<""
shift
while [ "$1" ]; do
read -r -d '' -- "$1" || {
eof="$1"
[ -z "${!ret}" ] && {
read -r -- "${ret?}" <<<"${!eof}"
last=$idx
}
}
((idx++))
shift
done
[ -z "$eof" ] || {
if [ "$last" != 0 ]; then
echo "Error: read-0-err couldn't fill all value" >&2
read -r -- "${ret?}" <<<"127"
fi
false
}
}

wyq() {
local exp="$1"
./yq e -0 "$1"
printf "%s" "$?"
}

wyq-r() {
local exp="$1"
./yq e -0 -r=false "$1"
printf "%s" "$?"
}

testBasicUsageRaw() {
cat >test.yml <<EOL
a: foo
b: bar
EOL

printf "foo\0bar\0" >expected.out

assertFileSame <(cat expected.out) \
<(./yq e -0 '.a, .b' test.yml)

rm expected.out
}

testBasicUsage() {
cat >test.yml <<EOL
a: foo
b: bar
EOL

read-0 a b < <(./yq e -0 '.a, .b' test.yml)

assertEquals "foo" "$a"
assertEquals "bar" "$b"

}

testBasicUsageJson() {
cat >test.yml <<EOL
a:
x: foo
b: bar
EOL

read-0 a b < <(./yq e -0 -o=json '.a, .b' test.yml)

assertEquals '{
"x": "foo"
}' "$a"
assertEquals '"bar"' "$b"

}

testFailWithValueContainingNUL() {
cat >test.yml <<EOL
a: "foo\u0000bar"
b: 1
c: |
wiz
boom
EOL

read-0 a b c < <(./yq e -0 '.a, .b, .c' test.yml)
errlvl="$?"
assertNotEquals "0" "$errlvl"

## using -r=false solves the issue but keeps all in yaml

read-0 a b c < <(./yq e -0 -r=false '.a, .b, .c' test.yml)
errlvl="$?"
assertEquals "0" "$errlvl"

assertEquals '"foo\0bar"' "$a"
assertEquals '1' "$b"
assertEquals '|
wiz
boom' "$c"
}

testDistinguishBetweenEOFAndFailure() {
cat >test.yml <<EOL
- yay
- wiz
- "foo\0bar"
- hop
- pow
EOL

res=""
while read-0 a || ! ret="$a"; do
res+="$a:"
done < <(./yq e -0 '.[]' test.yml; printf "%s" "$?")

assertEquals "1" "$ret"
assertEquals "yay:wiz:" "$res"

cat >test.yml <<EOL
- yay
- wiz
- hop
- pow
EOL

res=""
while read-0 a || ! ret="$a"; do
res+="$a:"
done < <(./yq e -0 '.[]' test.yml; printf "%s" "$?")
assertEquals "0" "$ret"
assertEquals "yay:wiz:hop:pow:" "$res"

}

testDistinguishBetweenEOFAndFailure2() {
cat >test.yml <<EOL
- yay
- wiz
- "foo\0bar"
- hop
- pow
EOL

res=""
while read-0 a || ! ret="$a"; do
res+="$a:"
done < <(./yq e -0 '.[]' test.yml; printf "$?")

assertEquals "1" "$ret"
assertEquals "yay:wiz:" "$res"

cat >test.yml <<EOL
- yay
- wiz
- hop
- pow
EOL

res=""
while read-0 a || ! ret="$a"; do
res+="$a:"
done < <(./yq e -0 '.[]' test.yml; printf "$?")
assertEquals "0" "$ret"
assertEquals "yay:wiz:hop:pow:" "$res"

}

testDistinguishBetweenEOFAndFailure3() {
cat >test.yml <<EOL
- yay
- wiz
- "foo\0bar"
- hop
- pow
EOL

res=""
while read-0-err E a b; do
res+="$a: $b;"
done < <(wyq '.[]' < test.yml)

assertEquals "1" "$E"
assertEquals "yay: wiz;" "$res"

cat >test.yml <<EOL
- yay
- wiz
- hop
- pow
EOL

res=""
while read-0-err E a b; do
res+="$a: $b;"
done < <(wyq '.[]' < test.yml)

assertEquals "0" "$E"
assertEquals "yay: wiz;hop: pow;" "$res"


cat >test.yml <<EOL
- yay
- wiz
- hop
- pow
- kwak
EOL

res=""
while read-0-err E a b; do
res+="$a: $b;"
done < <(wyq '.[]' < test.yml)

assertEquals "127" "$E"
assertEquals "yay: wiz;hop: pow;" "$res"

}


source ./scripts/shunit2
1 change: 1 addition & 0 deletions cmd/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ var colorsEnabled = false
var indent = 2
var noDocSeparators = false
var nullInput = false
var nulSepOutput = false
var verbose = false
var version = false
var prettyPrint = false
Expand Down
3 changes: 3 additions & 0 deletions cmd/evaluate_all_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ func evaluateAll(cmd *cobra.Command, args []string) (cmdError error) {
encoder := configureEncoder(format)

printer := yqlib.NewPrinter(encoder, printerWriter)
if nulSepOutput {
printer.SetNulSepOutput(true)
}

if frontMatter != "" {
frontMatterHandler := yqlib.NewFrontMatterHandler(args[0])
Expand Down
3 changes: 3 additions & 0 deletions cmd/evalute_sequence_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ func evaluateSequence(cmd *cobra.Command, args []string) (cmdError error) {
encoder := configureEncoder(format)

printer := yqlib.NewPrinter(encoder, printerWriter)
if nulSepOutput {
printer.SetNulSepOutput(true)
}

decoder, err := configureDecoder(false)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ yq -P sample.json
rootCmd.PersistentFlags().BoolVarP(&writeInplace, "inplace", "i", false, "update the file inplace of first file given.")
rootCmd.PersistentFlags().VarP(unwrapScalarFlag, "unwrapScalar", "r", "unwrap scalar, print the value with no quotes, colors or comments. Defaults to true for yaml")
rootCmd.PersistentFlags().Lookup("unwrapScalar").NoOptDefVal = "true"
rootCmd.PersistentFlags().BoolVarP(&nulSepOutput, "nul-output", "0", false, "Use NUL char to separate values. If unwrap scalar is also set, fail if unwrapped scalar contains NUL char.")

rootCmd.PersistentFlags().BoolVarP(&prettyPrint, "prettyPrint", "P", false, "pretty print, shorthand for '... style = \"\"'")
rootCmd.PersistentFlags().BoolVarP(&exitStatus, "exit-status", "e", false, "set exit status if there are no matches or null or false is returned")
Expand Down
48 changes: 45 additions & 3 deletions pkg/yqlib/printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package yqlib

import (
"bufio"
"bytes"
"container/list"
"fmt"
"io"
Expand All @@ -15,6 +16,7 @@ type Printer interface {
PrintedAnything() bool
//e.g. when given a front-matter doc, like jekyll
SetAppendix(reader io.Reader)
SetNulSepOutput(nulSepOutput bool)
}

type PrinterOutputFormat uint32
Expand Down Expand Up @@ -59,6 +61,7 @@ type resultsPrinter struct {
printedMatches bool
treeNavigator DataTreeNavigator
appendixReader io.Reader
nulSepOutput bool
}

func NewPrinter(encoder Encoder, printerWriter PrinterWriter) Printer {
Expand All @@ -67,9 +70,16 @@ func NewPrinter(encoder Encoder, printerWriter PrinterWriter) Printer {
printerWriter: printerWriter,
firstTimePrinting: true,
treeNavigator: NewDataTreeNavigator(),
nulSepOutput: false,
}
}

func (p *resultsPrinter) SetNulSepOutput(nulSepOutput bool) {
log.Debug("Setting NUL separator output")

p.nulSepOutput = nulSepOutput
}

func (p *resultsPrinter) SetAppendix(reader io.Reader) {
p.appendixReader = reader
}
Expand All @@ -84,6 +94,16 @@ func (p *resultsPrinter) printNode(node *yaml.Node, writer io.Writer) error {
return p.encoder.Encode(writer, node)
}

func removeLastEOL(b *bytes.Buffer) {
data := b.Bytes()
n := len(data)
if n >= 2 && data[n-2] == '\r' && data[n-1] == '\n' {
b.Truncate(n - 2)
} else if n >= 1 && (data[n-1] == '\r' || data[n-1] == '\n') {
b.Truncate(n - 1)
}
}

func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error {
log.Debug("PrintResults for %v matches", matchingNodes.Len())

Expand Down Expand Up @@ -128,18 +148,40 @@ func (p *resultsPrinter) PrintResults(matchingNodes *list.List) error {
}
}

if err := p.encoder.PrintLeadingContent(writer, mappedDoc.LeadingContent); err != nil {
var destination io.Writer = writer
tempBuffer := bytes.NewBuffer(nil)
if p.nulSepOutput {
destination = tempBuffer
}

if err := p.encoder.PrintLeadingContent(destination, mappedDoc.LeadingContent); err != nil {
return err
}

if err := p.printNode(mappedDoc.Node, writer); err != nil {
if err := p.printNode(mappedDoc.Node, destination); err != nil {
return err
}

if err := p.encoder.PrintLeadingContent(writer, mappedDoc.TrailingContent); err != nil {
if err := p.encoder.PrintLeadingContent(destination, mappedDoc.TrailingContent); err != nil {
return err
}

if p.nulSepOutput {
removeLastEOL(tempBuffer)
tempBufferBytes := tempBuffer.Bytes()
if bytes.IndexByte(tempBufferBytes, 0) != -1 {
return fmt.Errorf(
"Can't serialize value because it contains NUL char and you are using NUL separated output",
)
}
if _, err := writer.Write(tempBufferBytes); err != nil {
return err
}
if _, err := writer.Write([]byte{0}); err != nil {
return err
}
}

p.previousDocIndex = mappedDoc.Document
if err := writer.Flush(); err != nil {
return err
Expand Down

0 comments on commit e5785f0

Please sign in to comment.