Skip to content

Commit

Permalink
Adding a multinomial dataset for testing multinomial xgboost classifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
lucagiovagnoli committed Feb 14, 2020
1 parent 4c31d3e commit e7877bd
Show file tree
Hide file tree
Showing 5 changed files with 278 additions and 59 deletions.
150 changes: 150 additions & 0 deletions mleap-xgboost-runtime/src/test/resources/datasources/iris.scale.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
0 1:-0.555556 2:0.25 3:-0.864407 4:-0.916667
0 1:-0.666667 2:-0.166667 3:-0.864407 4:-0.916667
0 1:-0.777778 3:-0.898305 4:-0.916667
0 1:-0.833333 2:-0.0833334 3:-0.830508 4:-0.916667
0 1:-0.611111 2:0.333333 3:-0.864407 4:-0.916667
0 1:-0.388889 2:0.583333 3:-0.762712 4:-0.75
0 1:-0.833333 2:0.166667 3:-0.864407 4:-0.833333
0 1:-0.611111 2:0.166667 3:-0.830508 4:-0.916667
0 1:-0.944444 2:-0.25 3:-0.864407 4:-0.916667
0 1:-0.666667 2:-0.0833334 3:-0.830508 4:-1
0 1:-0.388889 2:0.416667 3:-0.830508 4:-0.916667
0 1:-0.722222 2:0.166667 3:-0.79661 4:-0.916667
0 1:-0.722222 2:-0.166667 3:-0.864407 4:-1
0 1:-1 2:-0.166667 3:-0.966102 4:-1
0 1:-0.166667 2:0.666667 3:-0.932203 4:-0.916667
0 1:-0.222222 2:1 3:-0.830508 4:-0.75
0 1:-0.388889 2:0.583333 3:-0.898305 4:-0.75
0 1:-0.555556 2:0.25 3:-0.864407 4:-0.833333
0 1:-0.222222 2:0.5 3:-0.762712 4:-0.833333
0 1:-0.555556 2:0.5 3:-0.830508 4:-0.833333
0 1:-0.388889 2:0.166667 3:-0.762712 4:-0.916667
0 1:-0.555556 2:0.416667 3:-0.830508 4:-0.75
0 1:-0.833333 2:0.333333 3:-1 4:-0.916667
0 1:-0.555556 2:0.0833333 3:-0.762712 4:-0.666667
0 1:-0.722222 2:0.166667 3:-0.694915 4:-0.916667
0 1:-0.611111 2:-0.166667 3:-0.79661 4:-0.916667
0 1:-0.611111 2:0.166667 3:-0.79661 4:-0.75
0 1:-0.5 2:0.25 3:-0.830508 4:-0.916667
0 1:-0.5 2:0.166667 3:-0.864407 4:-0.916667
0 1:-0.777778 3:-0.79661 4:-0.916667
0 1:-0.722222 2:-0.0833334 3:-0.79661 4:-0.916667
0 1:-0.388889 2:0.166667 3:-0.830508 4:-0.75
0 1:-0.5 2:0.75 3:-0.830508 4:-1
0 1:-0.333333 2:0.833333 3:-0.864407 4:-0.916667
0 1:-0.666667 2:-0.0833334 3:-0.830508 4:-1
0 1:-0.611111 3:-0.932203 4:-0.916667
0 1:-0.333333 2:0.25 3:-0.898305 4:-0.916667
0 1:-0.666667 2:-0.0833334 3:-0.830508 4:-1
0 1:-0.944444 2:-0.166667 3:-0.898305 4:-0.916667
0 1:-0.555556 2:0.166667 3:-0.830508 4:-0.916667
0 1:-0.611111 2:0.25 3:-0.898305 4:-0.833333
0 1:-0.888889 2:-0.75 3:-0.898305 4:-0.833333
0 1:-0.944444 3:-0.898305 4:-0.916667
0 1:-0.611111 2:0.25 3:-0.79661 4:-0.583333
0 1:-0.555556 2:0.5 3:-0.694915 4:-0.75
0 1:-0.722222 2:-0.166667 3:-0.864407 4:-0.833333
0 1:-0.555556 2:0.5 3:-0.79661 4:-0.916667
0 1:-0.833333 3:-0.864407 4:-0.916667
0 1:-0.444444 2:0.416667 3:-0.830508 4:-0.916667
0 1:-0.611111 2:0.0833333 3:-0.864407 4:-0.916667
1 1:0.5 3:0.254237 4:0.0833333
1 1:0.166667 3:0.186441 4:0.166667
1 1:0.444444 2:-0.0833334 3:0.322034 4:0.166667
1 1:-0.333333 2:-0.75 3:0.0169491 4:-4.03573e-08
1 1:0.222222 2:-0.333333 3:0.220339 4:0.166667
1 1:-0.222222 2:-0.333333 3:0.186441 4:-4.03573e-08
1 1:0.111111 2:0.0833333 3:0.254237 4:0.25
1 1:-0.666667 2:-0.666667 3:-0.220339 4:-0.25
1 1:0.277778 2:-0.25 3:0.220339 4:-4.03573e-08
1 1:-0.5 2:-0.416667 3:-0.0169491 4:0.0833333
1 1:-0.611111 2:-1 3:-0.152542 4:-0.25
1 1:-0.111111 2:-0.166667 3:0.0847457 4:0.166667
1 1:-0.0555556 2:-0.833333 3:0.0169491 4:-0.25
1 1:-1.32455e-07 2:-0.25 3:0.254237 4:0.0833333
1 1:-0.277778 2:-0.25 3:-0.118644 4:-4.03573e-08
1 1:0.333333 2:-0.0833334 3:0.152542 4:0.0833333
1 1:-0.277778 2:-0.166667 3:0.186441 4:0.166667
1 1:-0.166667 2:-0.416667 3:0.0508474 4:-0.25
1 1:0.0555554 2:-0.833333 3:0.186441 4:0.166667
1 1:-0.277778 2:-0.583333 3:-0.0169491 4:-0.166667
1 1:-0.111111 3:0.288136 4:0.416667
1 1:-1.32455e-07 2:-0.333333 3:0.0169491 4:-4.03573e-08
1 1:0.111111 2:-0.583333 3:0.322034 4:0.166667
1 1:-1.32455e-07 2:-0.333333 3:0.254237 4:-0.0833333
1 1:0.166667 2:-0.25 3:0.118644 4:-4.03573e-08
1 1:0.277778 2:-0.166667 3:0.152542 4:0.0833333
1 1:0.388889 2:-0.333333 3:0.288136 4:0.0833333
1 1:0.333333 2:-0.166667 3:0.355932 4:0.333333
1 1:-0.0555556 2:-0.25 3:0.186441 4:0.166667
1 1:-0.222222 2:-0.5 3:-0.152542 4:-0.25
1 1:-0.333333 2:-0.666667 3:-0.0508475 4:-0.166667
1 1:-0.333333 2:-0.666667 3:-0.0847458 4:-0.25
1 1:-0.166667 2:-0.416667 3:-0.0169491 4:-0.0833333
1 1:-0.0555556 2:-0.416667 3:0.38983 4:0.25
1 1:-0.388889 2:-0.166667 3:0.186441 4:0.166667
1 1:-0.0555556 2:0.166667 3:0.186441 4:0.25
1 1:0.333333 2:-0.0833334 3:0.254237 4:0.166667
1 1:0.111111 2:-0.75 3:0.152542 4:-4.03573e-08
1 1:-0.277778 2:-0.166667 3:0.0508474 4:-4.03573e-08
1 1:-0.333333 2:-0.583333 3:0.0169491 4:-4.03573e-08
1 1:-0.333333 2:-0.5 3:0.152542 4:-0.0833333
1 1:-1.32455e-07 2:-0.166667 3:0.220339 4:0.0833333
1 1:-0.166667 2:-0.5 3:0.0169491 4:-0.0833333
1 1:-0.611111 2:-0.75 3:-0.220339 4:-0.25
1 1:-0.277778 2:-0.416667 3:0.0847457 4:-4.03573e-08
1 1:-0.222222 2:-0.166667 3:0.0847457 4:-0.0833333
1 1:-0.222222 2:-0.25 3:0.0847457 4:-4.03573e-08
1 1:0.0555554 2:-0.25 3:0.118644 4:-4.03573e-08
1 1:-0.555556 2:-0.583333 3:-0.322034 4:-0.166667
1 1:-0.222222 2:-0.333333 3:0.0508474 4:-4.03573e-08
2 1:0.111111 2:0.0833333 3:0.694915 4:1
2 1:-0.166667 2:-0.416667 3:0.38983 4:0.5
2 1:0.555555 2:-0.166667 3:0.661017 4:0.666667
2 1:0.111111 2:-0.25 3:0.559322 4:0.416667
2 1:0.222222 2:-0.166667 3:0.627119 4:0.75
2 1:0.833333 2:-0.166667 3:0.898305 4:0.666667
2 1:-0.666667 2:-0.583333 3:0.186441 4:0.333333
2 1:0.666667 2:-0.25 3:0.79661 4:0.416667
2 1:0.333333 2:-0.583333 3:0.627119 4:0.416667
2 1:0.611111 2:0.333333 3:0.728813 4:1
2 1:0.222222 3:0.38983 4:0.583333
2 1:0.166667 2:-0.416667 3:0.457627 4:0.5
2 1:0.388889 2:-0.166667 3:0.525424 4:0.666667
2 1:-0.222222 2:-0.583333 3:0.355932 4:0.583333
2 1:-0.166667 2:-0.333333 3:0.38983 4:0.916667
2 1:0.166667 3:0.457627 4:0.833333
2 1:0.222222 2:-0.166667 3:0.525424 4:0.416667
2 1:0.888889 2:0.5 3:0.932203 4:0.75
2 1:0.888889 2:-0.5 3:1 4:0.833333
2 1:-0.0555556 2:-0.833333 3:0.355932 4:0.166667
2 1:0.444444 3:0.59322 4:0.833333
2 1:-0.277778 2:-0.333333 3:0.322034 4:0.583333
2 1:0.888889 2:-0.333333 3:0.932203 4:0.583333
2 1:0.111111 2:-0.416667 3:0.322034 4:0.416667
2 1:0.333333 2:0.0833333 3:0.59322 4:0.666667
2 1:0.611111 3:0.694915 4:0.416667
2 1:0.0555554 2:-0.333333 3:0.288136 4:0.416667
2 1:-1.32455e-07 2:-0.166667 3:0.322034 4:0.416667
2 1:0.166667 2:-0.333333 3:0.559322 4:0.666667
2 1:0.611111 2:-0.166667 3:0.627119 4:0.25
2 1:0.722222 2:-0.333333 3:0.728813 4:0.5
2 1:1 2:0.5 3:0.830508 4:0.583333
2 1:0.166667 2:-0.333333 3:0.559322 4:0.75
2 1:0.111111 2:-0.333333 3:0.38983 4:0.166667
2 1:-1.32455e-07 2:-0.5 3:0.559322 4:0.0833333
2 1:0.888889 2:-0.166667 3:0.728813 4:0.833333
2 1:0.111111 2:0.166667 3:0.559322 4:0.916667
2 1:0.166667 2:-0.0833334 3:0.525424 4:0.416667
2 1:-0.0555556 2:-0.166667 3:0.288136 4:0.416667
2 1:0.444444 2:-0.0833334 3:0.491525 4:0.666667
2 1:0.333333 2:-0.0833334 3:0.559322 4:0.916667
2 1:0.444444 2:-0.0833334 3:0.38983 4:0.833333
2 1:-0.166667 2:-0.416667 3:0.38983 4:0.5
2 1:0.388889 3:0.661017 4:0.833333
2 1:0.333333 2:0.0833333 3:0.59322 4:1
2 1:0.333333 2:-0.166667 3:0.423729 4:0.833333
2 1:0.111111 2:-0.583333 3:0.355932 4:0.5
2 1:0.222222 2:-0.166667 3:0.423729 4:0.583333
2 1:0.0555554 2:0.166667 3:0.491525 4:0.833333
2 1:-0.111111 2:-0.166667 3:0.38983 4:0.416667
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package ml.combust.mleap.xgboost.runtime

import ml.combust.mleap.core.types.{BasicType, NodeShape, ScalarType, StructField, TensorType}
import ml.combust.mleap.core.types.{BasicType, NodeShape, ScalarType, StructField, StructType, TensorType}
import ml.combust.mleap.runtime.frame.{DefaultLeapFrame, Transformer}
import ml.combust.mleap.tensor.SparseTensor
import ml.combust.mleap.xgboost.runtime.testing.{BoosterUtils, BundleSerializationUtils, CachedDatasetUtils, FloatingPointApproximations}
Expand All @@ -15,41 +15,53 @@ class XGBoostClassificationModelParitySpec extends FunSpec
with BundleSerializationUtils
with FloatingPointApproximations {

def createBoosterClassifier(booster: Booster): Transformer ={
def createBoosterClassifier: Transformer = {

val booster: Booster = trainBooster(denseDataset)

XGBoostClassification(
"xgboostSingleThread",
NodeShape.probabilisticClassifier(
rawPredictionCol = Some("raw_prediction"),
probabilityCol = Some("probability")),
XGBoostClassificationModel(XGBoostBinaryClassificationModel(booster, numFeatures, 0))
XGBoostClassificationModel(
XGBoostBinaryClassificationModel(booster, numFeatures(leapFrameLibSVMtrain), 0))
)
}

def createMultinomialBoosterClassifier(booster: Booster): Transformer ={
def createMultinomialBoosterClassifier: Transformer ={

val booster: Booster = trainMultinomialBooster(denseMultinomialDataset)

XGBoostClassification(
"xgboostSingleThread",
NodeShape.probabilisticClassifier(
rawPredictionCol = Some("raw_prediction"),
probabilityCol = Some("probability")),
XGBoostClassificationModel(XGBoostMultinomialClassificationModel(booster, 3, numFeatures, 0))
XGBoostClassificationModel(
XGBoostMultinomialClassificationModel(
booster, xgboostMultinomialParams("num_class").asInstanceOf[Int], numFeatures(leapFrameIrisTrain), 0))
)
}

def equalityTestRowByRow(booster: Booster, mleapTransformer: Transformer) = {
def equalityTestRowByRow(
booster: Booster,
mleapTransformer: Transformer,
leapFrameDataset: DefaultLeapFrame) = {

import XgbConverters._

leapFrameLibSVMtest.dataset.foreach {
val featuresColumnIndex = leapFrameDataset.schema.indexOf("features").get

leapFrameDataset.dataset.foreach {
r=>
val mleapResult = mleapTransformer.transform(DefaultLeapFrame(mleapSchema.get, Seq(r))).get
val mleapResult = mleapTransformer.transform(DefaultLeapFrame(leapFrameDataset.schema, Seq(r))).get

val mleapPredictionColIndex = mleapResult.schema.indexOf("prediction").get
val mleapRawPredictionColIndex = mleapResult.schema.indexOf("raw_prediction").get
val mleapProbabilityColIndex = mleapResult.schema.indexOf("probability").get

val singleRowDMatrix = r(1).asInstanceOf[SparseTensor[Double]].asXGB
val singleRowDMatrix = r(featuresColumnIndex).asInstanceOf[SparseTensor[Double]].asXGB

val boosterResult = booster.predict(singleRowDMatrix, false, 0).head(0)

Expand All @@ -76,20 +88,59 @@ class XGBoostClassificationModelParitySpec extends FunSpec
}
}

it("Results between the XGBoost4j booster and the MLeap Transformer are the same") {
val booster = trainBooster(xgboostParams, denseDataset)
val xgboostTransformer = createBoosterClassifier(trainBooster(xgboostParams, denseDataset))
def equalityTestRowByRowMultinomial(
booster: Booster,
mleapTransformer: Transformer,
leapFrameDataset: DefaultLeapFrame) = {

val mleapBundle = serializeModelToMleapBundle(xgboostTransformer)
val deserializedTransformer: Transformer = loadMleapTransformerFromBundle(mleapBundle)
import XgbConverters._

val featuresColumnIndex = leapFrameDataset.schema.indexOf("features").get

leapFrameDataset.dataset.foreach {
r =>
val mleapResult = mleapTransformer.transform(DefaultLeapFrame(leapFrameDataset.schema, Seq(r))).get

val mleapPredictionColIndex = mleapResult.schema.indexOf("prediction").get
val mleapRawPredictionColIndex = mleapResult.schema.indexOf("raw_prediction").get
val mleapProbabilityColIndex = mleapResult.schema.indexOf("probability").get

val singleRowDMatrix = r(featuresColumnIndex).asInstanceOf[SparseTensor[Double]].asXGB

val boosterResult = booster.predict(singleRowDMatrix, false, 0).head

val boosterProbability = Vectors.dense(boosterResult.map(_.toDouble)).toDense
val boosterPrediction = boosterProbability.argmax

assert(boosterPrediction == mleapResult.dataset.head.getDouble(mleapPredictionColIndex))

assert(
almostEqualSequences(
Seq(boosterProbability.values),
Seq(mleapResult.dataset.head.getTensor[Double](mleapProbabilityColIndex).toArray)
)
)

equalityTestRowByRow(booster, deserializedTransformer)
val boosterResultWithMargin = booster.predict(singleRowDMatrix, true, 0).head
val boosterRawPrediction = Vectors.dense(boosterResultWithMargin.map(_.toDouble)).toDense

assert(almostEqualSequences(
Seq(boosterRawPrediction.values),
Seq(mleapResult.dataset.head.getTensor[Double](mleapRawPredictionColIndex).toArray)
))
}
}

it("has the correct inputs and outputs with columns: prediction, probability and raw_prediction") {
it("Results between the XGBoost4j booster and the MLeap Transformer are the same") {
val booster = trainBooster(denseDataset)
val xgboostTransformer = createBoosterClassifier

val booster = trainBooster(xgboostParams, denseDataset)
val transformer = createBoosterClassifier(booster)
equalityTestRowByRow(booster, xgboostTransformer, leapFrameLibSVMtest)
}

it("has the correct inputs and outputs with columns: prediction, probability and raw_prediction") {
val transformer = createBoosterClassifier
val numFeatures = transformer.asInstanceOf[XGBoostClassification].model.numFeatures

assert(transformer.schema.fields ==
Seq(StructField("features", TensorType(BasicType.Double, Seq(numFeatures))),
Expand All @@ -99,29 +150,32 @@ class XGBoostClassificationModelParitySpec extends FunSpec
}

it("Results are the same pre and post serialization") {
val booster = trainBooster(xgboostParams, denseDataset)
val xgboostTransformer = createBoosterClassifier(booster)

val preSerializationResult = xgboostTransformer.transform(leapFrameLibSVMtrain)
val xgboostTransformer = createBoosterClassifier

val mleapBundle = serializeModelToMleapBundle(xgboostTransformer)

val deserializedTransformer: Transformer = loadMleapTransformerFromBundle(mleapBundle)
val deserializedModelResult = deserializedTransformer.transform(leapFrameLibSVMtrain).get

val preSerializationResult = xgboostTransformer.transform(leapFrameLibSVMtest)
val deserializedModelResult = deserializedTransformer.transform(leapFrameLibSVMtest).get

assert(preSerializationResult.get.dataset == deserializedModelResult.dataset)
}

it("Test XGBoostMultinomialClassificationModel results are the same pre and post serialization") {
val booster = trainBooster(xgboostParams, denseDataset)
val xgboostTransformer = createMultinomialBoosterClassifier(booster)
it("Results between the XGBoost4j multinomial booster and the MLeap XGBoostMultinomialClassificationModel are the same") {
val multiBooster = trainMultinomialBooster(denseMultinomialDataset)
val xgboostTransformer = createMultinomialBoosterClassifier

val preSerializationResult = xgboostTransformer.transform(leapFrameLibSVMtrain)
equalityTestRowByRowMultinomial(multiBooster, xgboostTransformer, leapFrameIrisTrain)
}

val mleapBundle = serializeModelToMleapBundle(xgboostTransformer)
it("XGBoostMultinomialClassificationModel results are the same pre and post serialization") {
val xgboostTransformer = createMultinomialBoosterClassifier

val mleapBundle = serializeModelToMleapBundle(xgboostTransformer)
val deserializedTransformer: Transformer = loadMleapTransformerFromBundle(mleapBundle)
val deserializedModelResult = deserializedTransformer.transform(leapFrameLibSVMtrain).get

val preSerializationResult = xgboostTransformer.transform(leapFrameIrisTrain)
val deserializedModelResult = deserializedTransformer.transform(leapFrameIrisTrain).get

assert(preSerializationResult.get.dataset == deserializedModelResult.dataset)
}
Expand Down
Loading

0 comments on commit e7877bd

Please sign in to comment.