Skip to content

Commit

Permalink
Fixed xml encoding of ProcInst #1563, improved XML comment handling
Browse files Browse the repository at this point in the history
  • Loading branch information
mikefarah committed Mar 1, 2023
1 parent fdb1487 commit 2195df9
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 34 deletions.
12 changes: 11 additions & 1 deletion pkg/yqlib/decoder_xml.go
Expand Up @@ -7,6 +7,7 @@ import (
"errors"
"fmt"
"io"
"regexp"
"strings"
"unicode"

Expand Down Expand Up @@ -48,11 +49,19 @@ func (dec *xmlDecoder) createSequence(nodes []*xmlNode) (*yaml.Node, error) {
return yamlNode, nil
}

var decoderCommentPrefix = regexp.MustCompile(`(^|\n)([[:alpha:]])`)

func (dec *xmlDecoder) processComment(c string) string {
if c == "" {
return ""
}
return "#" + strings.TrimRight(c, " ")
//need to replace "cat " with "# cat"
// "\ncat\n" with "\n cat\n"
// ensure non-empty comments starting with newline have a space in front

replacement := decoderCommentPrefix.ReplaceAllString(c, "$1 $2")
replacement = "#" + strings.ReplaceAll(strings.TrimRight(replacement, " "), "\n", "\n#")
return replacement
}

func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
Expand All @@ -77,6 +86,7 @@ func (dec *xmlDecoder) createMap(n *xmlNode) (*yaml.Node, error) {
var err error

if i == 0 {
log.Debugf("head comment here")
labelNode.HeadComment = dec.processComment(n.HeadComment)

}
Expand Down
13 changes: 9 additions & 4 deletions pkg/yqlib/doc/usage/xml.md
Expand Up @@ -407,8 +407,10 @@ A best attempt is made to copy comments to xml.

Given a sample.yml file of:
```yaml
#
# header comment
# above_cat
#
cat: # inline_cat
# above_array
array: # inline_array
Expand All @@ -425,9 +427,11 @@ yq -o=xml '.' sample.yml
will output
```xml
<!--
header comment
above_cat
--><!-- inline_cat --><cat><!-- above_array inline_array -->
header comment
above_cat
-->
<!-- inline_cat -->
<cat><!-- above_array inline_array -->
<array>val1<!-- inline_val1 --></array>
<array><!-- above_val2 -->val2<!-- inline_val2 --></array>
</cat><!-- below_cat -->
Expand Down Expand Up @@ -489,7 +493,8 @@ yq -p=xml -o=xml '.' sample.xml
```
will output
```xml
<!-- before cat --><cat><!-- in cat before -->
<!-- before cat -->
<cat><!-- in cat before -->
<x>3<!-- multi
line comment
for x --></x><!-- before y -->
Expand Down
84 changes: 60 additions & 24 deletions pkg/yqlib/encoder_xml.go
Expand Up @@ -19,8 +19,6 @@ type xmlEncoder struct {
leadingContent string
}

var commentPrefix = regexp.MustCompile(`(^|\n)\s*#`)

func NewXMLEncoder(indent int, prefs XmlPreferences) Encoder {
var indentString = ""

Expand All @@ -39,7 +37,7 @@ func (e *xmlEncoder) PrintDocumentSeparator(writer io.Writer) error {
}

func (e *xmlEncoder) PrintLeadingContent(writer io.Writer, content string) error {
e.leadingContent = commentPrefix.ReplaceAllString(content, "\n")
e.leadingContent = content
return nil
}

Expand All @@ -48,12 +46,39 @@ func (e *xmlEncoder) Encode(writer io.Writer, node *yaml.Node) error {
// hack so we can manually add newlines to procInst and directives
e.writer = writer
encoder.Indent("", e.indentString)
var newLine xml.CharData = []byte("\n")

mapNode := unwrapDoc(node)
if mapNode.Tag == "!!map" {
// make sure <?xml .. ?> processing instructions are encoded first
for i := 0; i < len(mapNode.Content); i += 2 {
key := mapNode.Content[i]
value := mapNode.Content[i+1]

if key.Value == (e.prefs.ProcInstPrefix + "xml") {
name := strings.Replace(key.Value, e.prefs.ProcInstPrefix, "", 1)
procInst := xml.ProcInst{Target: name, Inst: []byte(value.Value)}
if err := encoder.EncodeToken(procInst); err != nil {
return err
}
if _, err := e.writer.Write([]byte("\n")); err != nil {
log.Warning("Unable to write newline, skipping: %w", err)
}
}
}
}

if e.leadingContent != "" {

// remove first and last newlines if present
err := e.encodeComment(encoder, e.leadingContent)
if err != nil {
return err
}
err = encoder.EncodeToken(newLine)
if err != nil {
return err
}
}

switch node.Kind {
Expand Down Expand Up @@ -89,29 +114,12 @@ func (e *xmlEncoder) Encode(writer io.Writer, node *yaml.Node) error {
default:
return fmt.Errorf("unsupported type %v", node.Tag)
}
var charData xml.CharData = []byte("\n")
return encoder.EncodeToken(charData)

return encoder.EncodeToken(newLine)

}

func (e *xmlEncoder) encodeTopLevelMap(encoder *xml.Encoder, node *yaml.Node) error {
// make sure <?xml .. ?> processing instructions are encoded first
for i := 0; i < len(node.Content); i += 2 {
key := node.Content[i]
value := node.Content[i+1]

if key.Value == (e.prefs.ProcInstPrefix + "xml") {
name := strings.Replace(key.Value, e.prefs.ProcInstPrefix, "", 1)
procInst := xml.ProcInst{Target: name, Inst: []byte(value.Value)}
if err := encoder.EncodeToken(procInst); err != nil {
return err
}
if _, err := e.writer.Write([]byte("\n")); err != nil {
log.Warning("Unable to write newline, skipping: %w", err)
}
}
}

err := e.encodeComment(encoder, headAndLineComment(node))
if err != nil {
return err
Expand All @@ -126,6 +134,13 @@ func (e *xmlEncoder) encodeTopLevelMap(encoder *xml.Encoder, node *yaml.Node) er
if err != nil {
return err
}
if headAndLineComment(key) != "" {
var newLine xml.CharData = []byte("\n")
err = encoder.EncodeToken(newLine)
if err != nil {
return err
}
}

if key.Value == (e.prefs.ProcInstPrefix + "xml") {
// dont double process these.
Expand Down Expand Up @@ -206,12 +221,33 @@ func (e *xmlEncoder) doEncode(encoder *xml.Encoder, node *yaml.Node, start xml.S
return fmt.Errorf("unsupported type %v", node.Tag)
}

var xmlEncodeMultilineCommentRegex = regexp.MustCompile(`(^|\n) *# ?(.*)`)
var xmlEncodeSingleLineCommentRegex = regexp.MustCompile(`^\s*#(.*)\n?`)
var chompRegexp = regexp.MustCompile(`\n$`)

func (e *xmlEncoder) encodeComment(encoder *xml.Encoder, commentStr string) error {
if commentStr != "" {
log.Debugf("encoding comment %v", commentStr)
if !strings.HasSuffix(commentStr, " ") {
log.Debugf("got comment [%v]", commentStr)
// multi line string
if len(commentStr) > 2 && strings.Contains(commentStr[1:len(commentStr)-1], "\n") {
commentStr = chompRegexp.ReplaceAllString(commentStr, "")
log.Debugf("chompRegexp [%v]", commentStr)
commentStr = xmlEncodeMultilineCommentRegex.ReplaceAllString(commentStr, "$1$2")
log.Debugf("processed multine [%v]", commentStr)
// if the first line is non blank, add a space
if commentStr[0] != '\n' && commentStr[0] != ' ' {
commentStr = " " + commentStr
}

} else {
commentStr = xmlEncodeSingleLineCommentRegex.ReplaceAllString(commentStr, "$1")
}

if !strings.HasSuffix(commentStr, " ") && !strings.HasSuffix(commentStr, "\n") {
commentStr = commentStr + " "
log.Debugf("added suffix [%v]", commentStr)
}
log.Debugf("encoding comment [%v]", commentStr)

var comment xml.Comment = []byte(commentStr)
err := encoder.EncodeToken(comment)
Expand Down
108 changes: 103 additions & 5 deletions pkg/yqlib/xml_test.go
Expand Up @@ -10,6 +10,29 @@ import (
"github.com/mikefarah/yq/v4/test"
)

const yamlInputWithProcInstAndHeadComment = `# cats
+p_xml: version="1.0"
this: is some xml`

const expectedXmlProcInstAndHeadComment = `<?xml version="1.0"?>
<!-- cats -->
<this>is some xml</this>
`

const xmlProcInstAndHeadCommentBlock = `<?xml version="1.0"?>
<!--
cats
-->
<this>is some xml</this>
`

const expectedYamlProcInstAndHeadCommentBlock = `#
# cats
#
+p_xml: version="1.0"
this: is some xml
`

const inputXMLWithComments = `
<!-- before cat -->
<cat>
Expand Down Expand Up @@ -128,7 +151,8 @@ cat:
# after cat
`

const expectedRoundtripXMLWithComments = `<!-- before cat --><cat><!-- in cat before -->
const expectedRoundtripXMLWithComments = `<!-- before cat -->
<cat><!-- in cat before -->
<x>3<!-- multi
line comment
for x --></x><!-- before y -->
Expand All @@ -139,8 +163,10 @@ in d before -->
</cat><!-- after cat -->
`

const yamlWithComments = `# header comment
const yamlWithComments = `#
# header comment
# above_cat
#
cat: # inline_cat
# above_array
array: # inline_array
Expand All @@ -151,9 +177,11 @@ cat: # inline_cat
`

const expectedXMLWithComments = `<!--
header comment
above_cat
--><!-- inline_cat --><cat><!-- above_array inline_array -->
header comment
above_cat
-->
<!-- inline_cat -->
<cat><!-- above_array inline_array -->
<array>val1<!-- inline_val1 --></array>
<array><!-- above_val2 -->val2<!-- inline_val2 --></array>
</cat><!-- below_cat -->
Expand Down Expand Up @@ -266,6 +294,41 @@ var xmlScenarios = []formatScenario{
input: "<root><cats><cat>quick</cat><cat>soft</cat><!-- kitty_comment--><cat>squishy</cat></cats></root>",
expected: "root:\n cats:\n cat:\n - quick\n - soft\n # kitty_comment\n\n - squishy\n",
},
{
description: "ProcInst with head comment",
skipDoc: true,
input: yamlInputWithProcInstAndHeadComment,
expected: expectedXmlProcInstAndHeadComment,
scenarioType: "encode",
},
{
description: "ProcInst with head comment round trip",
skipDoc: true,
input: expectedXmlProcInstAndHeadComment,
expected: expectedXmlProcInstAndHeadComment,
scenarioType: "roundtrip",
},
{
description: "ProcInst with block head comment to yaml",
skipDoc: true,
input: xmlProcInstAndHeadCommentBlock,
expected: expectedYamlProcInstAndHeadCommentBlock,
scenarioType: "decode",
},
{
description: "ProcInst with block head comment from yaml",
skipDoc: true,
input: expectedYamlProcInstAndHeadCommentBlock,
expected: xmlProcInstAndHeadCommentBlock,
scenarioType: "encode",
},
{
description: "ProcInst with head comment round trip block",
skipDoc: true,
input: xmlProcInstAndHeadCommentBlock,
expected: xmlProcInstAndHeadCommentBlock,
scenarioType: "roundtrip",
},
{
description: "Parse xml: simple",
subdescription: "Notice how all the values are strings, see the next example on how you can fix that.",
Expand Down Expand Up @@ -466,6 +529,41 @@ var xmlScenarios = []formatScenario{
expected: "<cat name=\"tiger\">cool</cat>\n",
scenarioType: "encode",
},
{
description: "round trip multiline 1",
skipDoc: true,
input: "<x><!-- cats --></x>\n",
expected: "<x><!-- cats --></x>\n",
scenarioType: "roundtrip",
},
{
description: "round trip multiline 2",
skipDoc: true,
input: "<x><!--\n cats\n --></x>\n",
expected: "<x><!--\ncats\n--></x>\n",
scenarioType: "roundtrip",
},
{
description: "round trip multiline 3",
skipDoc: true,
input: "<x><!--\n\tcats\n --></x>\n",
expected: "<x><!--\n\tcats\n--></x>\n",
scenarioType: "roundtrip",
},
{
description: "round trip multiline 4",
skipDoc: true,
input: "<x><!--\n\tcats\n\tdogs\n--></x>\n",
expected: "<x><!--\n\tcats\n\tdogs\n--></x>\n",
scenarioType: "roundtrip",
},
{
description: "round trip multiline 5",
skipDoc: true, // pity spaces aren't kept atm.
input: "<x><!--\ncats\ndogs\n--></x>\n",
expected: "<x><!--\ncats\ndogs\n--></x>\n",
scenarioType: "roundtrip",
},
{
description: "Encode xml: comments",
subdescription: "A best attempt is made to copy comments to xml.",
Expand Down

0 comments on commit 2195df9

Please sign in to comment.