Skip to content

Commit

Permalink
Implement shell output format (#1645)
Browse files Browse the repository at this point in the history
* fix typo in a comment

* implement shell output format

* fix a typo

* add two test cases, have source uses ascii only

* add integration tests and documentation

* add fixes after code revieew
  • Loading branch information
giorgiga committed May 4, 2023
1 parent bbe3055 commit 80b42b8
Show file tree
Hide file tree
Showing 8 changed files with 438 additions and 3 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# Documentation

The documentation is a bit of a mixed bag (sorry in advanced, I do plan on simplifying it...) - with some parts automatically generated and stiched together and some statically defined.
The documentation is a bit of a mixed bag (sorry in advance, I do plan on simplifying it...) - with some parts automatically generated and stiched together and some statically defined.

Documentation is written in markdown, and is published in the 'gitbook' branch.

Expand Down
2 changes: 2 additions & 0 deletions cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ func createEncoder(format yqlib.PrinterOutputFormat) (yqlib.Encoder, error) {
return yqlib.NewXMLEncoder(indent, yqlib.ConfiguredXMLPreferences), nil
case yqlib.TomlOutputFormat:
return yqlib.NewTomlEncoder(), nil
case yqlib.ShellVariablesOutputFormat:
return yqlib.NewShellVariablesEncoder(), nil
}
return nil, fmt.Errorf("invalid encoder: %v", format)
}
Expand Down
86 changes: 86 additions & 0 deletions pkg/yqlib/doc/usage/shellvariables.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@

## Encode shell variables
Note that comments are dropped and values will be enclosed in single quotes as needed.

Given a sample.yml file of:
```yaml
# comment
name: Mike Wazowski
eyes:
color: turquoise
number: 1
friends:
- James P. Sullivan
- Celia Mae
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
name='Mike Wazowski'
eyes_color=turquoise
eyes_number=1
friends_0='James P. Sullivan'
friends_1='Celia Mae'
```

## Encode shell variables: illegal variable names as key.
Keys that would be illegal as variable keys are adapted.

Given a sample.yml file of:
```yaml
ascii_=_symbols: replaced with _
"ascii_ _controls": dropped (this example uses \t)
nonascii_א_characters: dropped
effrot_expeñded_tò_preserve_accented_latin_letters: moderate (via unicode NFKD)

```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
ascii___symbols='replaced with _'
ascii__controls='dropped (this example uses \t)'
nonascii__characters=dropped
effrot_expended_to_preserve_accented_latin_letters='moderate (via unicode NFKD)'
```

## Encode shell variables: empty values, arrays and maps
Empty values are encoded to empty variables, but empty arrays and maps are skipped.

Given a sample.yml file of:
```yaml
empty:
value:
array: []
map: {}
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
empty_value=
```

## Encode shell variables: single quotes in values
Single quotes in values are encoded as '"'"' (close single quote, double-quoted single quote, open single quote).

Given a sample.yml file of:
```yaml
name: Miles O'Brien
```
then
```bash
yq -o=shell sample.yml
```
will output
```sh
name='Miles O'"'"'Brien'
```

153 changes: 153 additions & 0 deletions pkg/yqlib/encoder_shellvariables.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package yqlib

import (
"fmt"
"io"
"strings"
"unicode/utf8"

"golang.org/x/text/unicode/norm"
yaml "gopkg.in/yaml.v3"
)

type shellVariablesEncoder struct {
}

func NewShellVariablesEncoder() Encoder {
return &shellVariablesEncoder{}
}

func (pe *shellVariablesEncoder) CanHandleAliases() bool {
return false
}

func (pe *shellVariablesEncoder) PrintDocumentSeparator(_ io.Writer) error {
return nil
}

func (pe *shellVariablesEncoder) PrintLeadingContent(_ io.Writer, _ string) error {
return nil
}

func (pe *shellVariablesEncoder) Encode(writer io.Writer, node *yaml.Node) error {

mapKeysToStrings(node)
err := pe.doEncode(&writer, node, "")
if err != nil {
return err
}

return err
}

func (pe *shellVariablesEncoder) doEncode(w *io.Writer, node *yaml.Node, path string) error {

// Note this drops all comments.

switch node.Kind {
case yaml.ScalarNode:
nonemptyPath := path
if path == "" {
// We can't assign an empty variable "=somevalue" because that would error out if sourced in a shell,
// nor can we use "_" as a variable name ($_ is a special shell variable that can't be assigned)...
// let's just pick a fallback key to use if we are encoding a single scalar
nonemptyPath = "value"
}
_, err := io.WriteString(*w, nonemptyPath+"="+quoteValue(node.Value)+"\n")
return err
case yaml.DocumentNode:
return pe.doEncode(w, node.Content[0], path)
case yaml.SequenceNode:
for index, child := range node.Content {
err := pe.doEncode(w, child, appendPath(path, index))
if err != nil {
return err
}
}
return nil
case yaml.MappingNode:
for index := 0; index < len(node.Content); index = index + 2 {
key := node.Content[index]
value := node.Content[index+1]
err := pe.doEncode(w, value, appendPath(path, key.Value))
if err != nil {
return err
}
}
return nil
case yaml.AliasNode:
return pe.doEncode(w, node.Alias, path)
default:
return fmt.Errorf("Unsupported node %v", node.Tag)
}
}

func appendPath(cookedPath string, rawKey interface{}) string {

// Shell variable names must match
// [a-zA-Z_]+[a-zA-Z0-9_]*
//
// While this is not mandated by POSIX, which is quite lenient, it is
// what shells (for example busybox ash *) allow in practice.
//
// Since yaml names can contain basically any character, we will process them according to these steps:
//
// 1. apply unicode compatibility decomposition NFKD (this will convert accented
// letters to letters followed by accents, split ligatures, replace exponents
// with the corresponding digit, etc.
//
// 2. discard non-ASCII characters as well as ASCII control characters (ie. anything
// with code point < 32 or > 126), this will eg. discard accents but keep the base
// unaccented letter because of NFKD above
//
// 3. replace all non-alphanumeric characters with _
//
// Moreover, for the root key only, we will prepend an underscore if what results from the steps above
// does not start with [a-zA-Z_] (ie. if the root key starts with a digit).
//
// Note this is NOT a 1:1 mapping.
//
// (*) see endofname.c from https://git.busybox.net/busybox/tag/?h=1_36_0

// XXX empty strings

key := strings.Map(func(r rune) rune {
if isAlphaNumericOrUnderscore(r) {
return r
} else if r < 32 || 126 < r {
return -1
}
return '_'
}, norm.NFKD.String(fmt.Sprintf("%v", rawKey)))

if cookedPath == "" {
firstRune, _ := utf8.DecodeRuneInString(key)
if !isAlphaOrUnderscore(firstRune) {
return "_" + key
}
return key
}
return cookedPath + "_" + key
}

func quoteValue(value string) string {
needsQuoting := false
for _, r := range value {
if !isAlphaNumericOrUnderscore(r) {
needsQuoting = true
break
}
}
if needsQuoting {
return "'" + strings.ReplaceAll(value, "'", "'\"'\"'") + "'"
}
return value
}

func isAlphaOrUnderscore(r rune) bool {
return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || r == '_'
}

func isAlphaNumericOrUnderscore(r rune) bool {
return isAlphaOrUnderscore(r) || ('0' <= r && r <= '9')
}
93 changes: 93 additions & 0 deletions pkg/yqlib/encoder_shellvariables_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package yqlib

import (
"bufio"
"bytes"
"strings"
"testing"

"github.com/mikefarah/yq/v4/test"
)

func assertEncodesTo(t *testing.T, yaml string, shellvars string) {
var output bytes.Buffer
writer := bufio.NewWriter(&output)

var encoder = NewShellVariablesEncoder()
inputs, err := readDocuments(strings.NewReader(yaml), "test.yml", 0, NewYamlDecoder(ConfiguredYamlPreferences))
if err != nil {
panic(err)
}
node := inputs.Front().Value.(*CandidateNode).Node
err = encoder.Encode(writer, node)
if err != nil {
panic(err)
}
writer.Flush()

test.AssertResult(t, shellvars, strings.TrimSuffix(output.String(), "\n"))
}

func TestShellVariablesEncoderNonquoting(t *testing.T) {
assertEncodesTo(t, "a: alice", "a=alice")
}

func TestShellVariablesEncoderQuoting(t *testing.T) {
assertEncodesTo(t, "a: Lewis Carroll", "a='Lewis Carroll'")
}

func TestShellVariablesEncoderQuotesQuoting(t *testing.T) {
assertEncodesTo(t, "a: Lewis Carroll's Alice", "a='Lewis Carroll'\"'\"'s Alice'")
}

func TestShellVariablesEncoderStripComments(t *testing.T) {
assertEncodesTo(t, "a: Alice # comment", "a=Alice")
}

func TestShellVariablesEncoderMap(t *testing.T) {
assertEncodesTo(t, "a:\n b: Lewis\n c: Carroll", "a_b=Lewis\na_c=Carroll")
}

func TestShellVariablesEncoderArray_Unwrapped(t *testing.T) {
assertEncodesTo(t, "a: [{n: Alice}, {n: Bob}]", "a_0_n=Alice\na_1_n=Bob")
}

func TestShellVariablesEncoderKeyNonPrintable(t *testing.T) {
assertEncodesTo(t, `"be\all": ring!`, "bell='ring!'")
}

func TestShellVariablesEncoderKeyPrintableNonAlphaNumeric(t *testing.T) {
assertEncodesTo(t, `"b-e l=l": ring!`, "b_e_l_l='ring!'")
}

func TestShellVariablesEncoderKeyPrintableNonAscii(t *testing.T) {
assertEncodesTo(t, `"b\u00e9ll": ring!`, "bell='ring!'")
}

func TestShellVariablesEncoderRootKeyStartingWithDigit(t *testing.T) {
assertEncodesTo(t, "1a: onea", "_1a=onea")
}

func TestShellVariablesEncoderRootKeyStartingWithUnderscore(t *testing.T) {
assertEncodesTo(t, "_key: value", "_key=value")
}

func TestShellVariablesEncoderChildStartingWithUnderscore(t *testing.T) {
assertEncodesTo(t, "root:\n _child: value", "root__child=value")
}

func TestShellVariablesEncoderEmptyValue(t *testing.T) {
assertEncodesTo(t, "empty:", "empty=")
}

func TestShellVariablesEncoderEmptyArray(t *testing.T) {
assertEncodesTo(t, "empty: []", "")
}

func TestShellVariablesEncoderEmptyMap(t *testing.T) {
assertEncodesTo(t, "empty: {}", "")
}

func TestShellVariablesEncoderScalarNode(t *testing.T) {
assertEncodesTo(t, "some string", "value='some string'")
}
5 changes: 4 additions & 1 deletion pkg/yqlib/printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const (
UriOutputFormat
ShOutputFormat
TomlOutputFormat
ShellVariablesOutputFormat
)

func OutputFormatFromString(format string) (PrinterOutputFormat, error) {
Expand All @@ -50,8 +51,10 @@ func OutputFormatFromString(format string) (PrinterOutputFormat, error) {
return XMLOutputFormat, nil
case "toml":
return TomlOutputFormat, nil
case "shell", "s", "sh":
return ShellVariablesOutputFormat, nil
default:
return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv|xml]", format)
return 0, fmt.Errorf("unknown format '%v' please use [yaml|json|props|csv|tsv|xml|toml|shell]", format)
}
}

Expand Down

0 comments on commit 80b42b8

Please sign in to comment.