Skip to content

Commit d50c583

Browse files
committedMay 8, 2023
Remove CDATA preservation on read
This feature, introduced in v1.1.3, was implemented in such a way that it broke the ability to read XML documents encoded in non-UTF8 character sets.
1 parent 211cdce commit d50c583

File tree

3 files changed

+8
-65
lines changed

3 files changed

+8
-65
lines changed
 

‎etree.go

+1-4
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,6 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
775775
var stack stack
776776
stack.push(e)
777777
for {
778-
xr.ResetPeek(dec.InputOffset())
779778
t, err := dec.RawToken()
780779
switch {
781780
case err == io.EOF:
@@ -806,9 +805,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
806805
case xml.CharData:
807806
data := string(t)
808807
var flags charDataFlags
809-
if xr.PeekContainsCdata() {
810-
flags = cdataFlag
811-
} else if isWhitespace(data) {
808+
if isWhitespace(data) {
812809
flags = whitespaceFlag
813810
}
814811
newCharData(data, flags, top)

‎etree_test.go

+2-21
Original file line numberDiff line numberDiff line change
@@ -892,8 +892,8 @@ func TestIndentPreserveWhitespace(t *testing.T) {
892892
{"<test> </test>", "<test> </test>"},
893893
{"<test>\t</test>", "<test>\t</test>"},
894894
{"<test>\t\n \t</test>", "<test>\t\n \t</test>"},
895-
{"<test><![CDATA[ ]]></test>", "<test><![CDATA[ ]]></test>"},
896-
{"<test> <![CDATA[ ]]> </test>", "<test><![CDATA[ ]]></test>"},
895+
{"<test><![CDATA[ ]]></test>", "<test> </test>"},
896+
{"<test> <![CDATA[ ]]> </test>", "<test/>"},
897897
{"<outer> <inner> </inner> </outer>", "<outer>\n <inner> </inner>\n</outer>"},
898898
}
899899

@@ -1278,22 +1278,3 @@ func TestWhitespace(t *testing.T) {
12781278
cd.SetData("")
12791279
checkBoolEq(t, cd.IsWhitespace(), true)
12801280
}
1281-
1282-
func TestPreserveCDATA(t *testing.T) {
1283-
s := `<name><![CDATA[My]] <b>name</b> <![CDATA[is]]></name>`
1284-
1285-
doc := NewDocument()
1286-
err := doc.ReadFromString(s)
1287-
if err != nil {
1288-
t.Fatalf("etree: failed to ReadFromString: %v", err)
1289-
}
1290-
1291-
result, err := doc.WriteToString()
1292-
if err != nil {
1293-
t.Fatalf("etree: failed to WriteToString: %v", err)
1294-
}
1295-
1296-
if result != s {
1297-
t.Errorf("etree: wanted %q, got %q", s, result)
1298-
}
1299-
}

‎helpers.go

+5-40
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ package etree
66

77
import (
88
"bufio"
9-
"bytes"
109
"io"
1110
"strings"
1211
"unicode/utf8"
@@ -88,54 +87,20 @@ func (f *fifo) grow() {
8887
// bytes read from its encapsulated reader and detects when a CDATA
8988
// prefix has been parsed.
9089
type xmlReader struct {
91-
r io.ByteReader
90+
r io.Reader
9291
bytes int64
93-
peek []byte
94-
last byte
9592
}
9693

9794
var cdataPrefix = []byte("<![CDATA[")
9895

9996
func newXmlReader(r io.Reader) *xmlReader {
100-
return &xmlReader{
101-
r: bufio.NewReader(r),
102-
bytes: 0,
103-
peek: make([]byte, 0, len(cdataPrefix)),
104-
last: 0,
105-
}
97+
return &xmlReader{r, 0}
10698
}
10799

108100
func (xr *xmlReader) Read(p []byte) (n int, err error) {
109-
// Since xmlReader implements the io.ByteReader interface, the XML decoder
110-
// bypasses Read in favor of ReadByte.
111-
return 0, nil
112-
}
113-
114-
func (xr *xmlReader) ReadByte() (b byte, err error) {
115-
b, err = xr.r.ReadByte()
116-
if err == nil {
117-
xr.last = b
118-
xr.bytes += 1
119-
if len(xr.peek) < len(cdataPrefix) {
120-
xr.peek = append(xr.peek, b)
121-
}
122-
}
123-
return b, err
124-
}
125-
126-
func (xr *xmlReader) ResetPeek(decoderOffset int64) {
127-
xr.peek = xr.peek[0:0]
128-
129-
// If the decoder offset doesn't match the number of bytes read so far,
130-
// then the decoder performed an "unget" on the last byte read. Return
131-
// this byte to the front of the peek buffer.
132-
if decoderOffset != xr.bytes {
133-
xr.peek = append(xr.peek, xr.last)
134-
}
135-
}
136-
137-
func (xr *xmlReader) PeekContainsCdata() bool {
138-
return bytes.Equal(xr.peek, cdataPrefix)
101+
n, err = xr.r.Read(p)
102+
xr.bytes += int64(n)
103+
return n, err
139104
}
140105

141106
// xmlWriter implements a proxy writer that counts the number of

0 commit comments

Comments
 (0)
Please sign in to comment.