Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make it possible to keep comments #325

Merged
merged 1 commit into from
Jun 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions xml/src/main/scala/fs2/data/xml/XmlEvent.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,6 @@ object XmlEvent {

case object EndDocument extends XmlEvent

case class Comment(comment: String) extends XmlEvent

}
45 changes: 29 additions & 16 deletions xml/src/main/scala/fs2/data/xml/internals/EventParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ package internals

import text._

import cats.syntax.all._

import scala.collection.immutable.VectorBuilder

private[xml] object EventParser {
Expand All @@ -28,7 +30,8 @@ private[xml] object EventParser {

val valueDelimiters = " \t\r\n<&"

def pipe[F[_], T](implicit F: RaiseThrowable[F], T: CharLikeChunks[F, T]): Pipe[F, T, XmlEvent] = {
def pipe[F[_], T](
includeComments: Boolean)(implicit F: RaiseThrowable[F], T: CharLikeChunks[F, T]): Pipe[F, T, XmlEvent] = {

val eos = T.create(Stream.empty)

Expand Down Expand Up @@ -273,7 +276,7 @@ private[xml] object EventParser {
case '!' =>
peekChar(T.advance(ctx), chunkAcc).flatMap {
case Some((ctx, chunkAcc, '-')) =>
skipComment(T.advance(ctx), chunkAcc)
readComment(T.advance(ctx), chunkAcc)
case Some((ctx, chunkAcc, '[')) =>
readCDATA(T.advance(ctx), chunkAcc)
case Some((ctx, chunkAcc, _)) =>
Expand All @@ -293,25 +296,29 @@ private[xml] object EventParser {
}

/** We have read '<!-' so far */
def skipComment(
def readComment(
ctx: T.Context,
chunkAcc: VectorBuilder[XmlEvent]): Pull[F, XmlEvent, (T.Context, VectorBuilder[XmlEvent], MarkupToken)] =
acceptChar(ctx, '-', "15", "second dash missing to open comment", chunkAcc).flatMap { case (ctx, chunkAcc) =>
def loop(ctx: T.Context,
builder: StringBuilder,
chunkAcc: VectorBuilder[XmlEvent]): Pull[F, XmlEvent, (T.Context, VectorBuilder[XmlEvent])] =
nextChar(ctx, chunkAcc).flatMap {
case (ctx, chunkAcc, '-') =>
nextChar(ctx, chunkAcc).flatMap {
case (ctx, chunkAcc, '-') =>
acceptChar(ctx, '>', "15", "'--' is not inside comments", chunkAcc)
case (ctx, chunkAcc, _) =>
loop(ctx, chunkAcc)
case (ctx, chunkAcc, c) =>
if (includeComments) builder.append('-').append(c)
loop(ctx, builder, chunkAcc)
}
case (ctx, chunkAcc, _) =>
loop(ctx, chunkAcc)
case (ctx, chunkAcc, c) =>
if (includeComments) builder.append(c)
loop(ctx, builder, chunkAcc)
}
loop(ctx, chunkAcc).map { case (ctx, chunkAcc) =>
(ctx, chunkAcc, MarkupToken.CommentToken)
val builder = new StringBuilder
loop(ctx, builder, chunkAcc).map { case (ctx, chunkAcc) =>
(ctx, chunkAcc, MarkupToken.CommentToken(includeComments.guard[Option].as(builder.result())))
}
}

Expand Down Expand Up @@ -411,11 +418,13 @@ private[xml] object EventParser {
peekChar(ctx, chunkAcc).flatMap {
case Some((ctx, chunkAcc, '<')) =>
readMarkupToken(ctx, chunkAcc).flatMap {
case (ctx, chunkAcc, MarkupToken.CommentToken) => scanMisc(ctx, chunkAcc)
case res @ (_, _, MarkupToken.PIToken(_)) => Pull.pure(Some(res))
case res @ (_, _, MarkupToken.DeclToken(_)) => Pull.pure(Some(res))
case res @ (_, _, MarkupToken.StartToken(_)) => Pull.pure(Some(res))
case (_, chunkAcc, t) => fail("22", s"unexpected token '$t'", Some(chunkAcc))
case (ctx, chunkAcc, MarkupToken.CommentToken(None)) => scanMisc(ctx, chunkAcc)
case (ctx, chunkAcc, MarkupToken.CommentToken(Some(comment))) =>
scanMisc(ctx, chunkAcc += XmlEvent.Comment(comment))
case res @ (_, _, MarkupToken.PIToken(_)) => Pull.pure(Some(res))
case res @ (_, _, MarkupToken.DeclToken(_)) => Pull.pure(Some(res))
case res @ (_, _, MarkupToken.StartToken(_)) => Pull.pure(Some(res))
case (_, chunkAcc, t) => fail("22", s"unexpected token '$t'", Some(chunkAcc))
}
case Some((_, chunkAcc, c)) =>
fail("22", s"unexpected character '$c'", Some(chunkAcc))
Expand Down Expand Up @@ -667,8 +676,10 @@ private[xml] object EventParser {
peekChar(ctx, chunkAcc).flatMap {
case Some((ctx, chunkAcc, '<')) =>
readMarkupToken(ctx, chunkAcc).flatMap {
case (ctx, chunkAcc, MarkupToken.CommentToken) =>
case (ctx, chunkAcc, MarkupToken.CommentToken(None)) =>
readCharData(ctx, is11, chunkAcc)
case (ctx, chunkAcc, MarkupToken.CommentToken(Some(comment))) =>
readCharData(ctx, is11, chunkAcc += XmlEvent.Comment(comment))
case (_, chunkAcc, MarkupToken.DeclToken(n)) =>
fail("14", s"unexpected declaration '$n'", Some(chunkAcc))
case (ctx, chunkAcc, MarkupToken.CDataToken) =>
Expand Down Expand Up @@ -751,8 +762,10 @@ private[xml] object EventParser {
}
case (ctx, chunkAcc, MarkupToken.StartToken(name)) =>
readElement(ctx, false, name, chunkAcc).map(Some(_))
case (ctx, chunkAcc, MarkupToken.CommentToken) =>
case (ctx, chunkAcc, MarkupToken.CommentToken(None)) =>
scanPrologToken1(ctx, false, chunkAcc)
case (ctx, chunkAcc, MarkupToken.CommentToken(Some(comment))) =>
scanPrologToken1(ctx, false, chunkAcc += XmlEvent.Comment(comment))
case (_, chunkAcc, t) =>
fail("22", s"unexpected markup $t", Some(chunkAcc))
}
Expand Down
4 changes: 2 additions & 2 deletions xml/src/main/scala/fs2/data/xml/internals/MarkupToken.scala
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ private object MarkupToken {
def render: String = s"<!$name"
}

case object CommentToken extends MarkupToken {
def render: String = "<!-- ... -->"
case class CommentToken(content: Option[String]) extends MarkupToken {
def render: String = s"<!-- ${content.getOrElse("...")} -->"
}

case object CDataToken extends MarkupToken {
Expand Down
17 changes: 15 additions & 2 deletions xml/src/main/scala/fs2/data/xml/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,24 @@ package object xml {

/** Transforms a stream of characters into a stream of XML events.
* Emitted tokens are guaranteed to be valid up to that point.
* If the streams ends without failure, the sequence of tokens is sensured
* If the streams ends without failure, the sequence of tokens is ensured
* to represent a (potentially empty) sequence of valid XML documents.
*/
@deprecated(message = "Use `fs2.data.xml.events()` instead.", since = "fs2-data 1.4.0")
def events[F[_], T](implicit F: RaiseThrowable[F], T: CharLikeChunks[F, T]): Pipe[F, T, XmlEvent] =
EventParser.pipe[F, T]
EventParser.pipe[F, T](false)

/** Transforms a stream of characters into a stream of XML events.
* Emitted tokens are guaranteed to be valid up to that point.
* If the streams ends without failure, the sequence of tokens is ensured
* to represent a (potentially empty) sequence of valid XML documents.
*
* if `includeComments` is `true`, then comment events will be emitted
* together with the comment content.
*/
def events[F[_], T](
includeComments: Boolean = false)(implicit F: RaiseThrowable[F], T: CharLikeChunks[F, T]): Pipe[F, T, XmlEvent] =
EventParser.pipe[F, T](includeComments)

/** Resolves the character and entity references in the XML stream.
* Entities are already defined and validated (especially no recursion),
Expand Down
58 changes: 54 additions & 4 deletions xml/src/test/scala/fs2/data/xml/EventParserTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ object EventParserTest extends SimpleIOSuite {
Stream
.emits(input)
.covary[IO]
.through(events)
.through(events())
.compile
.toList
.map(events =>
Expand All @@ -60,7 +60,7 @@ object EventParserTest extends SimpleIOSuite {
Stream
.emits("")
.covary[IO]
.through(events)
.through(events())
.compile
.toList
.map(events => expect(events.isEmpty))
Expand All @@ -71,7 +71,7 @@ object EventParserTest extends SimpleIOSuite {
Stream
.emits(input)
.covary[IO]
.through(events)
.through(events())
.compile
.toList
.map(events =>
Expand All @@ -92,6 +92,56 @@ object EventParserTest extends SimpleIOSuite {
)))
}

test("Comments should be ignored by default") {
val input = """<!-- some comment -->
|<a>
|<!-- and another one -->
|Text
|</a>""".stripMargin
Stream
.emits(input)
.covary[IO]
.through(events())
.compile
.toList
.map(events =>
expect(
events == List(
XmlEvent.StartDocument,
XmlEvent.StartTag(QName("a"), Nil, false),
XmlEvent.XmlString("\n", false),
XmlEvent.XmlString("\nText\n", false),
XmlEvent.EndTag(QName("a")),
XmlEvent.EndDocument
)))
}

test("Comments should be emitted if asked for") {
val input = """<!-- some comment -->
|<a>
|<!-- and another one -->
|Text
|</a>""".stripMargin
Stream
.emits(input)
.covary[IO]
.through(events(includeComments = true))
.compile
.toList
.map(events =>
expect(
events == List(
XmlEvent.StartDocument,
XmlEvent.Comment(" some comment "),
XmlEvent.StartTag(QName("a"), Nil, false),
XmlEvent.XmlString("\n", false),
XmlEvent.Comment(" and another one "),
XmlEvent.XmlString("\nText\n", false),
XmlEvent.EndTag(QName("a")),
XmlEvent.EndDocument
)))
}

val testFileDir = Path("xml/src/test/resources/xmlconf")
test("Standard test suite should pass") {
(Files[IO].walk(testFileDir.resolve("xmltest/valid")).filter(_.extName == ".xml") ++
Expand All @@ -102,7 +152,7 @@ object EventParserTest extends SimpleIOSuite {
.readAll(path, 1024, Flags.Read)
.through(fs2.text.utf8.decode)
.flatMap(Stream.emits(_))
.through(events)
.through(events())
.compile
.drain
.attempt
Expand Down
2 changes: 1 addition & 1 deletion xml/src/test/scala/fs2/data/xml/XmlExceptionSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ object XmlExceptionSpec extends SimpleIOSuite {

val input = """<a><b>c</a>"""

val stream = Stream.emit(input).through(events[Fallible, String]).attempt
val stream = Stream.emit(input).through(events[Fallible, String]()).attempt

expect(stream.compile.toList match {
case Right(
Expand Down