Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tree query compiler implementation #428

Merged
merged 11 commits into from
Jan 11, 2023
38 changes: 37 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -57,6 +57,13 @@ val commonSettings = List(
.toList
.flatten,
scalacOptions := scalacOptions.value.filterNot(_ == "-source:3.0-migration"),
scalacOptions ++= PartialFunction
.condOpt(CrossVersion.partialVersion(scalaVersion.value)) {
case Some((2, _)) => List("-Ypatmat-exhaust-depth", "40")
case _ => Nil
}
.toList
.flatten,
scalacOptions ++= PartialFunction
.condOpt(CrossVersion.partialVersion(scalaVersion.value)) {
case Some((2, 12)) =>
@@ -436,7 +443,36 @@ lazy val finiteState = crossProject(JVMPlatform, JSPlatform, NativePlatform)
.settings(
name := "fs2-data-finite-state",
description := "Streaming finite state machines",
tlVersionIntroduced := Map("3" -> "1.6.0", "2.13" -> "1.6.0", "2.12" -> "1.6.0")
tlVersionIntroduced := Map("3" -> "1.6.0", "2.13" -> "1.6.0", "2.12" -> "1.6.0"),
mimaBinaryIssueFilters ++= List(
// all filters related to esp.Rhs.Captured* come from converting it from case class to case object
ProblemFilters.exclude[MissingClassProblem]("fs2.data.esp.Rhs$CapturedLeaf"),
ProblemFilters.exclude[MissingTypesProblem]("fs2.data.esp.Rhs$CapturedLeaf$"),
ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedLeaf.apply"),
ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedLeaf.unapply"),
ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.name"),
ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.copy"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.esp.Rhs#CapturedTree.copy$default$1"),
ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.copy$default$2"),
ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.this"),
ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree.apply"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.esp.Rhs#CapturedLeaf.fromProduct"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.esp.Rhs#CapturedTree._1"),
ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.esp.Rhs#CapturedTree._2"),
ProblemFilters.exclude[ReversedMissingMethodProblem](
"fs2.data.mft.MFTBuilder#Guardable.fs2$data$mft$MFTBuilder$Guardable$$$outer"),
// rules now only have number of parameters
ProblemFilters.exclude[IncompatibleMethTypeProblem]("fs2.data.mft.Rules.apply"),
ProblemFilters.exclude[DirectMissingMethodProblem]("fs2.data.mft.Rules.params"),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("fs2.data.mft.Rules.copy"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.mft.Rules.copy$default$1"),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("fs2.data.mft.Rules.this"),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("fs2.data.mft.Rules.apply"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("fs2.data.mft.Rules._1")
)
)
.jsSettings(
scalaJSLinkerConfig ~= (_.withModuleKind(ModuleKind.CommonJSModule))
)
.nativeSettings(
tlVersionIntroduced := Map("3" -> "1.5.1", "2.13" -> "1.5.1", "2.12" -> "1.5.1")
30 changes: 19 additions & 11 deletions finite-state/shared/src/main/scala/fs2/data/esp/ESP.scala
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@ import scala.annotation.tailrec
* catch all rules, no matter what the state or depth is.
*/
private[data] class ESP[F[_], Guard, InTag, OutTag](init: Int,
val params: Map[Int, List[Int]],
val params: Map[Int, Int],
val rules: DecisionTree[Guard, Tag[InTag], Rhs[OutTag]])(implicit
F: RaiseThrowable[F]) {

@@ -40,9 +40,8 @@ private[data] class ESP[F[_], Guard, InTag, OutTag](init: Int,
TT: Tag2Tag[InTag, OutTag],
G: Evaluator[Guard, Tag[InTag]]) =
params.get(q).liftTo[Pull[F, Nothing, *]](new ESPException(s"unknown state $q")).flatMap { params =>
if (params.size === args.size) {
args
.zip(params)
if (params === args.size) {
args.zipWithIndex
.foldLeftM(env) { case (env, (arg, param)) =>
step(env, arg, in).map { rhs =>
env.updated(param, rhs)
@@ -56,7 +55,7 @@ private[data] class ESP[F[_], Guard, InTag, OutTag](init: Int,
}
} else {
Pull.raiseError(new ESPException(
s"wrong number of argument given in state $q reading input $in (expected ${params.size} but got ${args.size})"))
s"wrong number of argument given in state $q reading input $in (expected $params but got ${args.size})"))
}
}

@@ -101,7 +100,7 @@ private[data] class ESP[F[_], Guard, InTag, OutTag](init: Int,
case Rhs.SelfCall(q, params) =>
params
.traverse(eval(env, depth, in, _))
.flatMap(params => call(env, q, 0, params, in))
.flatMap(call(env, q, 0, _, in))
case Rhs.Param(i) =>
env
.get(i)
@@ -111,7 +110,7 @@ private[data] class ESP[F[_], Guard, InTag, OutTag](init: Int,
case Rhs.Tree(tag, inner) =>
eval(env, depth, in, inner)
.map(inner => Expr.Open(Out.makeOpen(tag), Expr.concat(inner, Expr.Close(Out.makeClose(tag), Expr.Epsilon))))
case Rhs.CapturedTree(_, inner) =>
case Rhs.CapturedTree(inner) =>
eval(env, depth, in, inner).flatMap { inner =>
in.flatMap(select(_, Selector.Cons(Selector.Root(), Tag.Open, 0)))
.liftTo[Pull[F, Nothing, *]](new ESPException("cannot capture eos"))
@@ -122,10 +121,18 @@ private[data] class ESP[F[_], Guard, InTag, OutTag](init: Int,
}
case Rhs.Leaf(v) =>
Pull.pure(Expr.Leaf(Out.makeLeaf(v), Expr.Epsilon))
case Rhs.CapturedLeaf(_) =>
case Rhs.CapturedLeaf =>
in.flatMap(select(_, Selector.Cons(Selector.Root(), Tag.Leaf, 0)))
.liftTo[Pull[F, Nothing, *]](new ESPException("cannot capture eos"))
.map(v => Expr.Leaf(Out.makeLeaf(TT.convert(v)), Expr.Epsilon))
case Rhs.ApplyToLeaf(f: (OutTag => Either[String, OutTag]) @unchecked) =>
in.flatMap(select(_, Selector.Cons(Selector.Root(), Tag.Leaf, 0)))
.liftTo[Pull[F, Nothing, *]](new ESPException("cannot capture eos"))
.flatMap(v =>
f(TT.convert(v))
.leftMap(new ESPException(_))
.liftTo[Pull[F, Nothing, *]]
.map(t => Expr.Leaf(Out.makeLeaf(t), Expr.Epsilon)))
case Rhs.Concat(rhs1, rhs2) =>
(eval(env, depth, in, rhs1), eval(env, depth, in, rhs2)).mapN(Expr.concat(_, _))
}
@@ -181,8 +188,9 @@ private[data] class ESP[F[_], Guard, InTag, OutTag](init: Int,
case None =>
step(env, e, none).map(squeezeAll(_)).flatMap { case (e, s) =>
e match {
case Expr.Epsilon => Pull.output(Chunk.seq(s))
case _ => Pull.raiseError(new ESPException(s"unexpected end of input $e"))
case Expr.Epsilon =>
Pull.output(Chunk.seq(s))
case _ => Pull.raiseError(new ESPException(s"unexpected end of input $e"))
}
}
}
@@ -197,6 +205,6 @@ private[data] class ESP[F[_], Guard, InTag, OutTag](init: Int,
Out: Conversion[OutTag, Out],
TT: Tag2Tag[InTag, OutTag],
G: Evaluator[Guard, Tag[InTag]]): Pipe[F, In, Out] =
transform(Chunk.empty, 0, _, Map.empty, Expr.Call(init, 0, Nil), new ListBuffer).stream
transform[In, Out](Chunk.empty, 0, _, Map.empty, Expr.Call(init, 0, Nil), new ListBuffer).stream

}
13 changes: 13 additions & 0 deletions finite-state/shared/src/main/scala/fs2/data/esp/Expr.scala
Original file line number Diff line number Diff line change
@@ -16,6 +16,10 @@

package fs2.data.esp

import cats.Show
import cats.syntax.foldable._
import cats.syntax.show._

sealed trait Expr[+Out]
object Expr {
case class Call[Out](q: Int, depth: Int, params: List[Expr[Out]]) extends Expr[Out]
@@ -34,4 +38,13 @@ object Expr {
case (Leaf(v, Epsilon), _) => Leaf(v, e2)
case (_, _) => Concat(e1, e2)
}

implicit def show[Out: Show]: Show[Expr[Out]] = Show.show {
case Call(q, d, ps) => show"q${q}_$d(${(ps: List[Expr[Out]]).mkString_(", ")})"
case Epsilon => ""
case Open(o, next) => show"$o $next"
case Close(c, next) => show"$c $next"
case Leaf(l, next) => show"$l $next"
case Concat(e1, e2) => show"$e1 $e2"
}
}
7 changes: 5 additions & 2 deletions finite-state/shared/src/main/scala/fs2/data/esp/Rhs.scala
Original file line number Diff line number Diff line change
@@ -41,13 +41,16 @@ object Rhs {
case class Tree[OutTag](tag: OutTag, inner: Rhs[OutTag]) extends Rhs[OutTag]

/** Builds a tree with the captured node tag in pattern. */
case class CapturedTree[OutTag](name: String, inner: Rhs[OutTag]) extends Rhs[OutTag]
case class CapturedTree[OutTag](inner: Rhs[OutTag]) extends Rhs[OutTag]

/** Emits a leaf value. */
case class Leaf[OutTag](value: OutTag) extends Rhs[OutTag]

/** Emits the captured input value. */
case class CapturedLeaf(name: String) extends Rhs[Nothing]
case object CapturedLeaf extends Rhs[Nothing]

/** Applies the function to a leaf value. */
case class ApplyToLeaf[OutTag](f: OutTag => Either[String, OutTag]) extends Rhs[OutTag]

/** Concatenates two RHS. */
case class Concat[OutTag](fst: Rhs[OutTag], snd: Rhs[OutTag]) extends Rhs[OutTag]
229 changes: 221 additions & 8 deletions finite-state/shared/src/main/scala/fs2/data/mft/MFT.scala
Original file line number Diff line number Diff line change
@@ -22,12 +22,20 @@ import esp.{Depth, ESP, Rhs => ERhs, Pattern, PatternDsl, Tag => ETag}

import cats.{Defer, MonadError}
import cats.syntax.all._
import cats.Show
import scala.annotation.tailrec

sealed trait Forest
object Forest {
case object Self extends Forest
case object First extends Forest
case object Second extends Forest

implicit val show: Show[Forest] = Show.show {
case Self => "x0"
case First => "x1"
case Second => "x2"
}
}

sealed trait EventSelector[Guard, InTag]
@@ -41,7 +49,11 @@ object EventSelector {

sealed trait Rhs[+OutTag] {
def ~[OutTag1 >: OutTag](that: Rhs[OutTag1]): Rhs[OutTag1] =
Rhs.Concat(this, that)
(this, that) match {
case (Rhs.Epsilon, _) => that
case (_, Rhs.Epsilon) => this
case (_, _) => Rhs.Concat(this, that)
}
}
object Rhs {
case class Call[OutTag](q: Int, x: Forest, parameters: List[Rhs[OutTag]]) extends Rhs[OutTag]
@@ -51,7 +63,22 @@ object Rhs {
case class CopyNode[OutTag](children: Rhs[OutTag]) extends Rhs[OutTag]
case class Leaf[OutTag](value: OutTag) extends Rhs[OutTag]
case object CopyLeaf extends Rhs[Nothing]
case class ApplyToLeaf[OutTag](f: OutTag => Either[String, OutTag]) extends Rhs[OutTag]
case class Concat[OutTag](fst: Rhs[OutTag], snd: Rhs[OutTag]) extends Rhs[OutTag]

implicit def show[O: Show]: Show[Rhs[O]] =
Show.show {
case Call(q, x, Nil) => show"q$q($x)"
case Call(q, x, ps) => show"q$q($x${(ps: List[Rhs[O]]).mkString_(", ", ", ", "")})"
case Epsilon => ""
case Param(i) => show"y$i"
case Node(tag, children) => show"<$tag>($children)"
case CopyNode(children) => show"%t($children)"
case Leaf(value) => show"<$value>"
case CopyLeaf => "%t"
case ApplyToLeaf(_) => "<leaf-function>"
case Concat(l, r) => show"$l $r"
}
}

/** A Macro Forest Transducer, as described in _Streamlining Functional XML Processing_.
@@ -60,7 +87,163 @@ object Rhs {
*
* An MFT is an intermediate structure towards a compiled [[fs2.data.esp.ESP Events Stream Processor]]
*/
private[data] class MFT[Guard, InTag, OutTag](init: Int, rules: Map[Int, Rules[Guard, InTag, OutTag]]) {
private[data] class MFT[Guard, InTag, OutTag](init: Int, val rules: Map[Int, Rules[Guard, InTag, OutTag]]) {

/** Returns an MFT that has the same behavior, but only propagates
* parameters that actually contribute to the output.
*/
def removeUnusedParameters: MFT[Guard, InTag, OutTag] = {
def bareOccurences(rhs: Rhs[OutTag]): Set[Int] =
rhs match {
case Rhs.Param(i) => Set(i)
case Rhs.Node(_, children) => bareOccurences(children)
case Rhs.CopyNode(children) => bareOccurences(children)
case Rhs.Concat(rhs1, rhs2) => bareOccurences(rhs1) ++ bareOccurences(rhs2)
case _ => Set.empty
}

def findAllCalls(rhs: Rhs[OutTag]): List[Rhs.Call[OutTag]] =
rhs match {
case Rhs.Call(q, x, ps) => Rhs.Call(q, x, ps) :: ps.flatMap(findAllCalls(_))
case Rhs.Node(_, children) => findAllCalls(children)
case Rhs.CopyNode(children) => findAllCalls(children)
case Rhs.Concat(fst, snd) => findAllCalls(fst) ++ findAllCalls(snd)
case _ => Nil
}

val usedParams =
rules.fmap { case Rules(_, rhss) =>
rhss.map { case (_, rhs) => bareOccurences(rhs) }.combineAll
}

@tailrec
def findAllUsedParams(usedParams: Map[Int, Set[Int]]): Map[Int, Set[Int]] = {
val newUsed = usedParams.combine(rules.fmap { case Rules(_, rhss) =>
rhss.flatMap { case (_, rhs) =>
findAllCalls(rhs).flatMap { case Rhs.Call(q1, _, args) =>
val usedInQ1 = usedParams.getOrElse(q1, Set())
args.zipWithIndex.collect {
case (rhs, i) if usedInQ1.contains(i) =>
bareOccurences(rhs)
}
}
}.combineAll
})
if (newUsed == usedParams)
usedParams
else
findAllUsedParams(newUsed)
}

val allUsedParams = findAllUsedParams(usedParams)

def dropUnused(rhs: Rhs[OutTag], usedParams: Set[Int]): Rhs[OutTag] =
rhs match {
case Rhs.Call(q, x, args) =>
Rhs.Call(q,
x,
args.zipWithIndex
.collect {
case (a, i) if allUsedParams.getOrElse(q, Set.empty).contains(i) =>
dropUnused(a, usedParams)
})
case Rhs.Node(tag, children) => Rhs.Node(tag, dropUnused(children, usedParams))
case Rhs.CopyNode(children) => Rhs.CopyNode(dropUnused(children, usedParams))
case Rhs.Concat(rhs1, rhs2) => Rhs.Concat(dropUnused(rhs1, usedParams), dropUnused(rhs2, usedParams))
case Rhs.Param(i) => Rhs.Param(usedParams.count(_ < i))
case _ => rhs
}

val rules1 = rules.map2(allUsedParams) { case (Rules(_, rhss), usedParams) =>
Rules(usedParams.size, rhss.map { case (sel, rhs) => (sel, dropUnused(rhs, usedParams)) })
}

new MFT(init, rules1)
}

/** Returns an MFT that has the same behavior but with stay moves inlined when possible. */
def inlineStayMoves: MFT[Guard, InTag, OutTag] = {
// first we gather all the stay states, for which the RHS is only calling other states on self
// and is the same for all cases.
def hasOnlySelfCalls(rhs: Rhs[OutTag]): Boolean =
rhs match {
case Rhs.Call(_, Forest.Self, _) => true
case Rhs.Call(_, _, _) => false
case Rhs.Node(_, children) => hasOnlySelfCalls(children)
case Rhs.CopyNode(children) => hasOnlySelfCalls(children)
case Rhs.Concat(rhs1, rhs2) => hasOnlySelfCalls(rhs1) && hasOnlySelfCalls(rhs2)
case _ => true
}

val stayStates = rules.mapFilter { rules =>
if (rules.isWildcard)
rules.tree.headOption.collect { case (_, rhs) if hasOnlySelfCalls(rhs) => rhs }
else
none
}

def subst(rhs: Rhs[OutTag], x: Forest, args: List[Rhs[OutTag]]): Rhs[OutTag] =
rhs match {
case Rhs.Call(q, _, args1) => Rhs.Call(q, x, args1.map(subst(_, x, args)))
case Rhs.Param(i) => args.lift(i).getOrElse(Rhs.Epsilon)
case Rhs.Node(t, children) => Rhs.Node(t, subst(children, x, args))
case Rhs.CopyNode(children) => Rhs.CopyNode(subst(children, x, args))
case Rhs.Concat(rhs1, rhs2) => Rhs.Concat(subst(rhs1, x, args), subst(rhs2, x, args))
case _ => rhs
}

def inlineStayCalls(rhs: Rhs[OutTag]): Rhs[OutTag] =
rhs match {
case Rhs.Call(q, x, args) =>
stayStates.get(q) match {
case Some(rhs) =>
subst(rhs, x, args.map(inlineStayCalls(_)))
case None => rhs
}
case Rhs.Node(t, children) => Rhs.Node(t, inlineStayCalls(children))
case Rhs.CopyNode(children) => Rhs.CopyNode(inlineStayCalls(children))
case Rhs.Concat(rhs1, rhs2) => Rhs.Concat(inlineStayCalls(rhs1), inlineStayCalls(rhs2))
case _ => rhs
}

val rules1 = rules.fmap { case Rules(nparams, rhss) =>
Rules(nparams,
rhss.map { case (sel, rhs) =>
(sel, inlineStayCalls(rhs))
})
}

new MFT(init, rules1)
}

/** Returns an MFT that has the same behavior but without states
* that are never called from the initial state.
*/
def removeUnreachableStates: MFT[Guard, InTag, OutTag] = {
def reachable(toProcess: List[Int], processed: Set[Int]): Set[Int] =
toProcess match {
case q :: qs =>
if (processed.contains(q)) {
reachable(qs, processed)
} else {
def calledStates(rhs: Rhs[OutTag]): List[Int] =
rhs match {
case Rhs.Call(q, _, args) => q :: args.flatMap(calledStates(_))
case Rhs.Node(_, children) => calledStates(children)
case Rhs.CopyNode(children) => calledStates(children)
case Rhs.Concat(rhs1, rhs2) => calledStates(rhs1) ++ calledStates(rhs2)
case _ => Nil
}
val newStates = rules.get(q).map(_.tree.map(_._2).flatMap(calledStates(_))).getOrElse(Nil)
reachable(newStates ++ qs, processed + q)
}
case Nil => processed
}

val reachableStates = reachable(List(init), Set.empty)

new MFT(init, rules.filter { case (k, _) => reachableStates.contains(k) })
}

/** Compiles this MFT into an ESP.
* The generated ESP contains one decision tree encoding all the patterns
@@ -79,9 +262,10 @@ private[data] class MFT[Guard, InTag, OutTag](init: Int, rules: Map[Int, Rules[G
case Rhs.Param(i) => ERhs.Param(i)
case Rhs.Epsilon => ERhs.Epsilon
case Rhs.Node(tag, inner) => ERhs.Tree(tag, translateRhs(inner))
case Rhs.CopyNode(inner) => ERhs.CapturedTree("in", translateRhs(inner))
case Rhs.CopyNode(inner) => ERhs.CapturedTree(translateRhs(inner))
case Rhs.Leaf(v) => ERhs.Leaf(v)
case Rhs.CopyLeaf => ERhs.CapturedLeaf("in")
case Rhs.CopyLeaf => ERhs.CapturedLeaf
case Rhs.ApplyToLeaf(f) => ERhs.ApplyToLeaf(f)
case Rhs.Concat(rhs1, rhs2) => ERhs.Concat(translateRhs(rhs1), translateRhs(rhs2))
}

@@ -107,18 +291,47 @@ private[data] class MFT[Guard, InTag, OutTag](init: Int, rules: Map[Int, Rules[G
val dflt = translateRhs(rhs)
List(state(q, 0)(close) -> dflt, state(q)(eos) -> dflt)
} ++ List(
state(q)(open) -> ERhs.Call(q, Depth.Increment, params.map(ERhs.Param(_))),
state(q)(open) -> ERhs.Call(q, Depth.Increment, List.tabulate(params)(ERhs.Param(_))),
state(q, 0)(close) -> ERhs.Epsilon,
state(q)(close) -> ERhs.Call(q, Depth.Decrement, params.map(ERhs.Param(_))),
state(q)(value) -> ERhs.Call(q, Depth.Copy, params.map(ERhs.Param(_))),
state(q)(close) -> ERhs.Call(q, Depth.Decrement, List.tabulate(params)(ERhs.Param(_))),
state(q)(value) -> ERhs.Call(q, Depth.Copy, List.tabulate(params)(ERhs.Param(_))),
state(q)(eos) -> ERhs.Epsilon
)
}

val compiler =
new pattern.Compiler[F, Guard, ETag[InTag], Pattern[Guard, InTag], ERhs[OutTag]]

compiler.compile(cases).map(new ESP(init, rules.fmap(_.params), _))
compiler.compile(cases).map(new ESP(init, rules.fmap(_.nparams), _))
}

}

object MFT {

implicit def show[G: Show, I: Show, O: Show]: Show[MFT[G, I, O]] = Show.show { mft =>
mft.rules.toList
.sortBy(_._1)
.map { case (src, rules) =>
val params =
if (rules.nparams == 0)
""
else
List.tabulate(rules.nparams)(i => s"y$i").mkString(", ", ", ", "")
implicit val showSelector: Show[EventSelector[G, I]] = Show.show {
case EventSelector.AnyNode(g) => show"(<%t>$params)${g.fold("")(g => show" when $g")}"
case EventSelector.Node(t, g) => show"(<$t>$params)${g.fold("")(g => show" when $g")}"
case EventSelector.AnyLeaf(g) => show"(<%t />$params)${g.fold("")(g => show" when $g")}"
case EventSelector.Leaf(t, g) => show"(<$t />$params)${g.fold("")(g => show" when $g")}"
case EventSelector.Epsilon() => show"$params)"
}
rules.tree
.map { case (pat, rhs) =>
show"q$src$pat -> $rhs"
}
.mkString_("\n")
}
.mkString_("\n\n")
}

}
Original file line number Diff line number Diff line change
@@ -37,6 +37,8 @@ class MFTBuilder[Guard, InTag, OutTag] private[mft] {
sealed trait PatternBuilder
sealed trait Guardable extends PatternBuilder {
def when(guard: Guard): PatternBuilder
def when(guard: Option[Guard]): PatternBuilder =
guard.fold[PatternBuilder](this)(when(_))
}
private[mft] object PatternBuilder {
case class Any(guard: Option[Guard]) extends Guardable {
@@ -76,6 +78,6 @@ class MFTBuilder[Guard, InTag, OutTag] private[mft] {
}

def build: MFT[Guard, InTag, OutTag] =
new MFT(initial, states.map { st => st.q -> Rules(List.range(0, st.nargs), st.rules.result()) }.toMap)
new MFT(initial, states.map { st => st.q -> Rules(st.nargs, st.rules.result()) }.toMap)

}
Original file line number Diff line number Diff line change
@@ -16,4 +16,9 @@

package fs2.data.mft

case class Rules[Guard, InTag, OutTag](params: List[Int], tree: List[(EventSelector[Guard, InTag], Rhs[OutTag])])
case class Rules[Guard, InTag, OutTag](nparams: Int, tree: List[(EventSelector[Guard, InTag], Rhs[OutTag])]) {
def isWildcard: Boolean =
tree.map(_._1).toSet == Set(EventSelector.AnyLeaf(None),
EventSelector.AnyNode(None),
EventSelector.Epsilon()) && tree.map(_._2).toSet.size == 1
}
Original file line number Diff line number Diff line change
@@ -79,4 +79,7 @@ package object mft {
def copy: Rhs[Nothing] =
Rhs.CopyLeaf

def applyToLeaf[OutTag](f: OutTag => Either[String, OutTag]) =
Rhs.ApplyToLeaf(f)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright 2022 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data.mft.query

import cats.data.NonEmptyList

/* An abstract representation of query language that consists of
* nested for loops over some paths and tagged element construction.
*
* ''Note for implementers'': a path is always relative to the closes enclosing `for` clause.
*/
sealed trait Query[Tag, Path]
object Query {
case class Empty[Tag, Path]() extends Query[Tag, Path]
case class ForClause[Tag, Path](variable: String, source: Path, result: Query[Tag, Path]) extends Query[Tag, Path]
case class LetClause[Tag, Path](variable: String, query: Query[Tag, Path], result: Query[Tag, Path])
extends Query[Tag, Path]
case class Ordpath[Tag, Path](path: Path) extends Query[Tag, Path]
case class Variable[Tag, Path](name: String) extends Query[Tag, Path]
case class Node[Tag, Path](tag: Tag, child: Query[Tag, Path]) extends Query[Tag, Path]
case class Leaf[Tag, Path](tag: Tag) extends Query[Tag, Path]
case class Sequence[Tag, Path](elements: NonEmptyList[Query[Tag, Path]]) extends Query[Tag, Path]
case class LeafFunction[Tag, Path](f: Tag => Either[String, Tag]) extends Query[Tag, Path]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
/*
* Copyright 2022 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data
package mft
package query

import pfsa.{Candidate, Pred, Regular}
import cats.Eq
import cats.syntax.all._
import cats.data.NonEmptyList

/** This compiler can be used to compile to an MFT any query language that can be represented by nested for loops.
*
* The compiler is based on the approach described in [[https://doi.org/10.1109/ICDE.2014.6816714 _XQuery Streaming by Forest Transducers_]]
* and generalized for the abstract query language on trees.
*/
private[fs2] abstract class QueryCompiler[Tag, Path] {

type Matcher
type Pattern
type Guard

/** A single char to be matched in a path */
type Char

implicit def predicate: Pred[Matcher, Char]

implicit def candidate: Candidate[Matcher, Char]

implicit def charsEq: Eq[Matcher]

/** Creates a regular expression given a path. */
def path2regular(path: Path): Regular[Matcher]

/** Create (ordered) pattern matching cases with guards for a given matcher.
*
* Guard is expressed as a conjunction of atomic guard operations.
* If a case has no guard, returns an empty list for this case.
*
* Cases will be matched in ordered, the first matching case will be taken.
*/
def cases(matcher: Matcher): List[(Pattern, List[Guard])]

/** Return the constructor tag of this pattern, or `None` if it is a wildcard. */
def tagOf(pattern: Pattern): Option[Tag]

def compile(query: Query[Tag, Path]): MFT[NonEmptyList[Guard], Tag, Tag] = {
val mft = dsl[NonEmptyList[Guard], Tag, Tag] { implicit builder =>
val q0 = state(args = 0, initial = true)
val qinit = state(args = 1)
val qcopy = state(args = 0)

qcopy(anyNode) -> copy(qcopy(x1)) ~ qcopy(x2)
qcopy(anyLeaf) -> copy ~ qcopy(x1)
qcopy(epsilon) -> eps

// input is copied in the first argument
q0(any) -> qinit(x0, qcopy(x0))

def translatePath(path: Path, start: builder.StateBuilder, end: builder.StateBuilder): Unit = {
val regular = path2regular(path)
val dfa = regular.deriveDFA
// resolve transitions into patterns and guards
val transitionCases =
dfa.transitions.toList.zipWithIndex.map { case (transitions, src) =>
(src,
transitions.flatMap { case (cond, tgt) =>
cases(cond).map { case (pat, guard) =>
(pat, NonEmptyList.fromList(guard), tgt)
}
})
}.toMap
// we can apply the DFA to MFT translation now
transitionCases.foldLeft(Map(dfa.init -> start)) { case (states, (src, transitions)) =>
val initialSrc = src === dfa.init
val (q1, states1) =
states.get(src) match {
case Some(q1) => (q1, states)
case None =>
val q1 =
if (initialSrc)
start
else
state(args = start.nargs)
(q1, states.updated(src, q1))
}
val copyArgs = List.tabulate(q1.nargs)(y(_))
val states2 =
transitions.foldLeft(states1) { case (states, (pattern, guard, tgt)) =>
val finalTgt = dfa.finals.contains(tgt)
val (q2, states1) =
states.get(tgt) match {
case Some(q2) => (q2, states)
case None =>
val q2 = state(args = q1.nargs)
(q2, states.updated(tgt, q2))
}
val pat: builder.Guardable = tagOf(pattern).fold(anyNode)(aNode(_))
if (!finalTgt) {
q1(pat.when(guard)) -> q2(x1, copyArgs: _*) ~ q1(x2, copyArgs: _*)
} else {
q1(pat.when(guard)) -> end(x1, (copyArgs :+ copy(qcopy(x1))): _*) ~ q2(x1, copyArgs: _*) ~
q1(x2, copyArgs: _*)
}
states1
}
q1(anyLeaf) -> eps
q1(epsilon) -> eps
states2
}: Unit
}

def translate(query: Query[Tag, Path], vars: List[String], q: builder.StateBuilder): Unit =
query match {
case Query.Empty() =>
q(any) -> eps

case Query.ForClause(variable, source, result) =>
val q1 = state(args = q.nargs + 1)

// compile the variable binding path
translatePath(source, q, q1)

// then the body with the bound variable
translate(result, variable :: vars, q1)

case Query.LetClause(variable, query, result) =>
val qv = state(args = q.nargs)
val q1 = state(args = q.nargs + 1)

// compile the variable binding query
translate(query, vars, qv)
// then the body with the bound variable
translate(result, variable :: vars, q1)

// bind everything
val copyArgs = List.tabulate(q.nargs)(y(_))
q(any) -> q1(x0, (copyArgs :+ qv(x0, copyArgs: _*)): _*)

case Query.Ordpath(path) =>
val q1 = state(args = q.nargs + 1)

// compile the path
translatePath(path, q, q1)

// emit the result
q1(any) -> y(q.nargs)
case Query.Node(tag, child) =>
val q1 = state(args = q.nargs)

// translate the child query
translate(child, vars, q1)

// bind it
val copyArgs = List.tabulate(q.nargs)(y(_))
q(any) -> node(tag)(q1(x0, copyArgs: _*))

case Query.Leaf(tag) =>
// just emit it
q(any) -> leaf(tag)

case Query.Variable(name) =>
// variable named are pushed on top of the list, so indexing is reversed
q(any) -> y(vars.size - 1 - vars.indexOf(name))

case Query.Sequence(queries) =>
val copyArgs = List.tabulate(q.nargs)(y(_))

// compile and sequence every query in the sequence
val rhs =
queries.foldLeft[Rhs[Tag]](eps) { (acc, query) =>
val q1 = state(args = q.nargs)

// translate the query
translate(query, vars, q1)

acc ~ q1(x0, copyArgs: _*)
}

// emit rhs for any input
q(any) -> rhs

case Query.LeafFunction(f) =>
q(anyLeaf) -> applyToLeaf(f)
}

translate(query, List("$input"), qinit)
}
// apply some optimizations until nothing changes or credit is exhausted
def optimize(mft: MFT[NonEmptyList[Guard], Tag, Tag], credit: Int): MFT[NonEmptyList[Guard], Tag, Tag] =
if (credit > 0) {
val mft1 = mft.removeUnusedParameters.inlineStayMoves.removeUnreachableStates
if (mft1.rules == mft.rules)
mft
else
optimize(mft1, credit - 1)
} else {
mft
}
optimize(mft, 50)
}

}
Original file line number Diff line number Diff line change
@@ -206,12 +206,12 @@ class Compiler[F[_], Expr, Tag, Pat, Out](implicit
if (nonTrivialIdx >= 0) {
// there are non trivially true guards after the first one
specialize(col, Pat.trueTag, Nil, matrix.take(1 + nonTrivialIdx)).map { smat =>
(g, Map(Pat.trueTag -> ((false, Nil, smat))), (false, matrix.drop(1 + nonTrivialIdx)).some)
(g, Map(Pat.trueTag -> ((false, Nil, smat))), (true, matrix.drop(1 + nonTrivialIdx)).some)
}
} else {
// only trivially false guard afterwards
specialize(col, Pat.trueTag, Nil, matrix).map { smat =>
(g, Map(Pat.trueTag -> ((false, Nil, smat))), (false, defaultMatrix(col, matrix)).some)
(g, Map(Pat.trueTag -> ((false, Nil, smat))), (true, defaultMatrix(col, matrix)).some)
}
}
}
Original file line number Diff line number Diff line change
@@ -16,6 +16,8 @@

package fs2.data.pattern

import cats.Show

/** Describes the structure of an expression in term of constructor
* trees that can be selected.
*/
@@ -42,6 +44,9 @@ object Evaluator {
* To be used when a pattern language has no guards.
*/
sealed trait NoGuard
object NoGuard {
implicit val show: Show[NoGuard] = Show.show(_ => "")
}

object Selectable {
def apply[Expr, Tag](implicit ev: Selectable[Expr, Tag]): Selectable[Expr, Tag] =
Original file line number Diff line number Diff line change
@@ -14,12 +14,12 @@
* limitations under the License.
*/

package fs2.data.pattern

import fs2.data.esp.Tag
import fs2.data.esp.Conversion
package fs2.data

import cats.Eq
import fs2.data.esp.{Conversion, Tag}
import fs2.data.pattern.{ConstructorTree, Selectable}
import cats.Show

sealed trait MiniXML
object MiniXML {
@@ -56,4 +56,13 @@ object MiniXML {

}

implicit object MiniXMLShow extends Show[MiniXML] {
def show(node: MiniXML): String =
node match {
case Open(name) => s"<$name>"
case Close(name) => s"</$name>"
case Text(t) => t
}
}

}
27 changes: 27 additions & 0 deletions finite-state/shared/src/test/scala/fs2/data/MiniXPath.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2022 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data

import cats.data.NonEmptyList

case class MiniXPath(steps: NonEmptyList[Step])

sealed trait Step
object Step {
case class Child(name: Option[String]) extends Step
case class Descendant(name: Option[String]) extends Step
}
761 changes: 761 additions & 0 deletions finite-state/shared/src/test/scala/fs2/data/mft/QuerySpec.scala

Large diffs are not rendered by default.