From 5cf5e7a936b602357ad61edb1fe23be49cb128d8 Mon Sep 17 00:00:00 2001 From: Slawomir Jaranowski <s.jaranowski@gmail.com> Date: Fri, 17 May 2024 14:29:16 +0200 Subject: [PATCH 1/5] Testing special chars in xml output --- .../org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java b/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java index 7f7f321..6ce7a2d 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java @@ -82,6 +82,8 @@ private String createExpectedXML(boolean escape) { buf.append(LS); buf.append(" </el6>"); buf.append(LS); + buf.append(" <el8>special-char-" + (char) 7 + "</el8>"); + buf.append(LS); buf.append("</root>"); return buf.toString(); @@ -119,6 +121,10 @@ private Xpp3Dom createXpp3Dom() { el7.setValue("element7\n&\"\'<>"); el6.addChild(el7); + Xpp3Dom el8 = new Xpp3Dom("el8"); + el8.setValue("special-char-" + (char) 7); + + dom.addChild(el8); return dom; } } From 386daf0629c2090271a8d0b872563d6d5a923b60 Mon Sep 17 00:00:00 2001 From: Michael Osipov <1983-01-06@gmx.net> Date: Sat, 18 May 2024 21:59:20 +0200 Subject: [PATCH 2/5] Naive solution --- .../plexus/util/xml/PrettyPrintXMLWriter.java | 17 ++++++++++++++++- .../plexus/util/xml/pull/MXSerializer.java | 18 +++++------------- .../plexus/util/xml/Xpp3DomWriterTest.java | 6 +++++- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java b/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java index e089c5c..66ef295 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java +++ b/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java @@ -186,7 +186,7 @@ private void writeText(String text, boolean escapeXml) { finishTag(); if (escapeXml) { - text = escapeXml(text); + text = escapeXmlText(text); } write(StringUtils.unifyLineSeparators(text, lineSeparator)); @@ -228,6 +228,8 @@ private static String escapeXml(String text) { private static final Pattern lowers = Pattern.compile("([\000-\037])"); + private static final Pattern lowersText = Pattern.compile("([\000-\010\013-\014\016-\037])"); + private static String escapeXmlAttribute(String text) { text = escapeXml(text); @@ -247,6 +249,19 @@ private static String escapeXmlAttribute(String text) { return b.toString(); } + private static String escapeXmlText(String text) { + text = escapeXml(text); + + Matcher m = lowersText.matcher(text); + StringBuffer b = new StringBuffer(); + while (m.find()) { + m = m.appendReplacement(b, "&#" + Integer.toString(m.group(1).charAt(0)) + ";"); + } + m.appendTail(b); + + return b.toString(); + } + /** {@inheritDoc} */ @Override public void addAttribute(String key, String value) { diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java index e69d28f..75882ff 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java @@ -943,19 +943,11 @@ protected void writeElementContent(String text, Writer out) throws IOException { // out.write(';'); // pos = i + 1; } else { - throw new IllegalStateException( - "character " + Integer.toString(ch) + " is not allowed in output" + getLocation()); - // in XML 1.1 legal are [#x1-#xD7FF] - // if(ch > 0) { - // if(i > pos) out.write(text.substring(pos, i)); - // out.write("&#"); - // out.write(Integer.toString(ch)); - // out.write(';'); - // pos = i + 1; - // } else { - // throw new IllegalStateException( - // "character zero is not allowed in XML 1.1 output"+getLocation()); - // } + if (i > pos) out.write(text.substring(pos, i)); + out.write("&#"); + out.write(Integer.toString(ch)); + out.write(';'); + pos = i + 1; } } if (seenBracket) { diff --git a/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java b/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java index 6ce7a2d..ec6af29 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java @@ -82,7 +82,11 @@ private String createExpectedXML(boolean escape) { buf.append(LS); buf.append(" </el6>"); buf.append(LS); - buf.append(" <el8>special-char-" + (char) 7 + "</el8>"); + if (escape) { + buf.append(" <el8>special-char-</el8>"); + } else { + buf.append(" <el8>special-char-" + (char) 7 + "</el8>"); + } buf.append(LS); buf.append("</root>"); From 23329e7660e149a2ee35758d8c818aeb3d317632 Mon Sep 17 00:00:00 2001 From: Slawomir Jaranowski <s.jaranowski@gmail.com> Date: Sun, 19 May 2024 18:18:38 +0200 Subject: [PATCH 3/5] Remove special chars from xml output --- .../plexus/util/xml/PrettyPrintXMLWriter.java | 4 +- .../plexus/util/xml/pull/MXSerializer.java | 4 +- .../plexus/util/xml/Xpp3DomWriterTest.java | 4 +- .../util/xml/pull/MXSerializerTest.java | 58 +++++++++++++++++++ 4 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 src/test/java/org/codehaus/plexus/util/xml/pull/MXSerializerTest.java diff --git a/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java b/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java index 66ef295..99a8cba 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java +++ b/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java @@ -231,7 +231,7 @@ private static String escapeXml(String text) { private static final Pattern lowersText = Pattern.compile("([\000-\010\013-\014\016-\037])"); private static String escapeXmlAttribute(String text) { - text = escapeXml(text); + text = escapeXmlText(text); // Windows Matcher crlfmatcher = crlf.matcher(text); @@ -255,7 +255,7 @@ private static String escapeXmlText(String text) { Matcher m = lowersText.matcher(text); StringBuffer b = new StringBuffer(); while (m.find()) { - m = m.appendReplacement(b, "&#" + Integer.toString(m.group(1).charAt(0)) + ";"); + m = m.appendReplacement(b, ""); } m.appendTail(b); diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java index 75882ff..0f0d222 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java @@ -943,10 +943,8 @@ protected void writeElementContent(String text, Writer out) throws IOException { // out.write(';'); // pos = i + 1; } else { + // skip special char if (i > pos) out.write(text.substring(pos, i)); - out.write("&#"); - out.write(Integer.toString(ch)); - out.write(';'); pos = i + 1; } } diff --git a/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java b/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java index ec6af29..233bdd5 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/Xpp3DomWriterTest.java @@ -83,7 +83,7 @@ private String createExpectedXML(boolean escape) { buf.append(" </el6>"); buf.append(LS); if (escape) { - buf.append(" <el8>special-char-</el8>"); + buf.append(" <el8>special-char-</el8>"); } else { buf.append(" <el8>special-char-" + (char) 7 + "</el8>"); } @@ -101,7 +101,7 @@ private Xpp3Dom createXpp3Dom() { dom.addChild(el1); Xpp3Dom el2 = new Xpp3Dom("el2"); - el2.setAttribute("att2", "attribute2\nnextline"); + el2.setAttribute("att2", "attribute2\nnextline" + (char) 7); dom.addChild(el2); Xpp3Dom el3 = new Xpp3Dom("el3"); diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXSerializerTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXSerializerTest.java new file mode 100644 index 0000000..36a89d3 --- /dev/null +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXSerializerTest.java @@ -0,0 +1,58 @@ +package org.codehaus.plexus.util.xml.pull; + +import java.io.StringReader; +import java.io.StringWriter; +import java.util.Arrays; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class MXSerializerTest { + + @Test + void testSerialize() throws Exception { + + StringWriter writer = new StringWriter(); + + MXSerializer sr = new MXSerializer(); + sr.setOutput(writer); + + sr.startDocument(null, Boolean.TRUE); + sr.startTag(null, "root"); + for (int i : Arrays.asList(8, 9, 10, 11, 13, 15)) { + sr.startTag(null, "char"); + sr.text(Character.getName(i) + ": " + ((char) i)); + sr.endTag(null, "char"); + } + + sr.endTag(null, "root"); + sr.endDocument(); + assertEquals(expectedOutput(), writer.toString()); + } + + @Test + void testDeserialize() throws Exception { + MXParser parser = new MXParser(); + parser.setInput(new StringReader(expectedOutput())); + int eventType = parser.getEventType(); + + while (eventType != XmlPullParser.END_DOCUMENT) { + eventType = parser.next(); + } + } + + private String expectedOutput() { + StringBuilder out = new StringBuilder(); + out.append("<?xml version=\"1.0\" standalone=\"yes\"?>"); + out.append("<root>"); + out.append("<char>BACKSPACE: </char>"); + out.append("<char>CHARACTER TABULATION: \t"); + out.append("</char><char>LINE FEED (LF): \n"); + out.append("</char><char>LINE TABULATION: </char>"); + out.append("<char>CARRIAGE RETURN (CR): \r"); + out.append("</char><char>SHIFT IN: </char>"); + out.append("</root>"); + return out.toString(); + } +} From 0b3cd5c8071208ca60e89db00c796e5c90e54a0d Mon Sep 17 00:00:00 2001 From: Slawomir Jaranowski <s.jaranowski@gmail.com> Date: Mon, 20 May 2024 09:07:15 +0200 Subject: [PATCH 4/5] Fix after review --- .../plexus/util/xml/PrettyPrintXMLWriter.java | 16 ++++++++-------- .../plexus/util/xml/pull/MXSerializer.java | 1 + .../plexus/util/xml/pull/MXSerializerTest.java | 10 +++++----- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java b/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java index 99a8cba..48bde11 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java +++ b/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java @@ -24,9 +24,9 @@ import java.util.regex.Pattern; /** - * Implementation of XMLWriter which emits nicely formatted documents. - * + * <p>Implementation of XMLWriter which emits nicely formatted documents.</p> * + * <p>C0 controls chars are omitted from output</p> */ public class PrettyPrintXMLWriter implements XMLWriter { /** Line separator ("\n" on UNIX) */ @@ -226,9 +226,9 @@ private static String escapeXml(String text) { private static final Pattern crlf = Pattern.compile(crlf_str); - private static final Pattern lowers = Pattern.compile("([\000-\037])"); + private static final Pattern lowers = Pattern.compile("([\\x00-\\x1F])"); - private static final Pattern lowersText = Pattern.compile("([\000-\010\013-\014\016-\037])"); + private static final Pattern illegalC0Characters = Pattern.compile("([\\x00-\\x08\\x0B-\\x0C\\x0E-\\x1F])"); private static String escapeXmlAttribute(String text) { text = escapeXmlText(text); @@ -252,12 +252,12 @@ private static String escapeXmlAttribute(String text) { private static String escapeXmlText(String text) { text = escapeXml(text); - Matcher m = lowersText.matcher(text); + Matcher matcher = illegalC0Characters.matcher(text); StringBuffer b = new StringBuffer(); - while (m.find()) { - m = m.appendReplacement(b, ""); + while (matcher.find()) { + matcher = matcher.appendReplacement(b, ""); } - m.appendTail(b); + matcher.appendTail(b); return b.toString(); } diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java index 0f0d222..580a878 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java @@ -24,6 +24,7 @@ * <li>PROPERTY_SERIALIZER_INDENTATION * <li>PROPERTY_SERIALIZER_LINE_SEPARATOR * </ul> + * <p>C0 controls chars are omitted from output</p> */ public class MXSerializer implements XmlSerializer { protected static final String XML_URI = "http://www.w3.org/XML/1998/namespace"; diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXSerializerTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXSerializerTest.java index 36a89d3..8b66c23 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXSerializerTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXSerializerTest.java @@ -47,11 +47,11 @@ private String expectedOutput() { out.append("<?xml version=\"1.0\" standalone=\"yes\"?>"); out.append("<root>"); out.append("<char>BACKSPACE: </char>"); - out.append("<char>CHARACTER TABULATION: \t"); - out.append("</char><char>LINE FEED (LF): \n"); - out.append("</char><char>LINE TABULATION: </char>"); - out.append("<char>CARRIAGE RETURN (CR): \r"); - out.append("</char><char>SHIFT IN: </char>"); + out.append("<char>CHARACTER TABULATION: \t</char>"); + out.append("<char>LINE FEED (LF): \n</char>"); + out.append("<char>LINE TABULATION: </char>"); + out.append("<char>CARRIAGE RETURN (CR): \r</char>"); + out.append("<char>SHIFT IN: </char>"); out.append("</root>"); return out.toString(); } From 39f779e53b7fd7f18ce2554d087e581a1fa181c9 Mon Sep 17 00:00:00 2001 From: Slawomir Jaranowski <s.jaranowski@gmail.com> Date: Mon, 20 May 2024 15:36:24 +0200 Subject: [PATCH 5/5] Fix after review --- .../java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java | 2 +- .../java/org/codehaus/plexus/util/xml/pull/MXSerializer.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java b/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java index 48bde11..cd35289 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java +++ b/src/main/java/org/codehaus/plexus/util/xml/PrettyPrintXMLWriter.java @@ -26,7 +26,7 @@ /** * <p>Implementation of XMLWriter which emits nicely formatted documents.</p> * - * <p>C0 controls chars are omitted from output</p> + * <p>C0n control characters except <code>\n</code>, <code>\r</code>, and <code>\t</code> are omitted from output</p> */ public class PrettyPrintXMLWriter implements XMLWriter { /** Line separator ("\n" on UNIX) */ diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java index 580a878..ffd0ede 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java @@ -24,7 +24,7 @@ * <li>PROPERTY_SERIALIZER_INDENTATION * <li>PROPERTY_SERIALIZER_LINE_SEPARATOR * </ul> - * <p>C0 controls chars are omitted from output</p> + * <p>C0n control characters except <code>\n</code>, <code>\r</code>, and <code>\t</code> are omitted from output</p> */ public class MXSerializer implements XmlSerializer { protected static final String XML_URI = "http://www.w3.org/XML/1998/namespace";