Skip to content

Commit f89808b

Browse files
WeichenXu123Felix Cheung
authored andcommitted
[SPARK-17499][SPARKR][ML][MLLIB] make the default params in sparkR spark.mlp consistent with MultilayerPerceptronClassifier
## What changes were proposed in this pull request? update `MultilayerPerceptronClassifierWrapper.fit` paramter type: `layers: Array[Int]` `seed: String` update several default params in sparkR `spark.mlp`: `tol` --> 1e-6 `stepSize` --> 0.03 `seed` --> NULL ( when seed == NULL, the scala-side wrapper regard it as a `null` value and the seed will use the default one ) r-side `seed` only support 32bit integer. remove `layers` default value, and move it in front of those parameters with default value. add `layers` parameter validation check. ## How was this patch tested? tests added. Author: WeichenXu <[email protected]> Closes apache#15051 from WeichenXu123/update_py_mlp_default.
1 parent 90d5754 commit f89808b

File tree

3 files changed

+33
-7
lines changed

3 files changed

+33
-7
lines changed

R/pkg/R/mllib.R

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -694,12 +694,19 @@ setMethod("predict", signature(object = "KMeansModel"),
694694
#' }
695695
#' @note spark.mlp since 2.1.0
696696
setMethod("spark.mlp", signature(data = "SparkDataFrame"),
697-
function(data, blockSize = 128, layers = c(3, 5, 2), solver = "l-bfgs", maxIter = 100,
698-
tol = 0.5, stepSize = 1, seed = 1) {
697+
function(data, layers, blockSize = 128, solver = "l-bfgs", maxIter = 100,
698+
tol = 1E-6, stepSize = 0.03, seed = NULL) {
699+
layers <- as.integer(na.omit(layers))
700+
if (length(layers) <= 1) {
701+
stop ("layers must be a integer vector with length > 1.")
702+
}
703+
if (!is.null(seed)) {
704+
seed <- as.character(as.integer(seed))
705+
}
699706
jobj <- callJStatic("org.apache.spark.ml.r.MultilayerPerceptronClassifierWrapper",
700707
"fit", data@sdf, as.integer(blockSize), as.array(layers),
701708
as.character(solver), as.integer(maxIter), as.numeric(tol),
702-
as.numeric(stepSize), as.integer(seed))
709+
as.numeric(stepSize), seed)
703710
new("MultilayerPerceptronClassificationModel", jobj = jobj)
704711
})
705712

R/pkg/inst/tests/testthat/test_mllib.R

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,25 @@ test_that("spark.mlp", {
391391

392392
unlink(modelPath)
393393

394+
# Test default parameter
395+
model <- spark.mlp(df, layers = c(4, 5, 4, 3))
396+
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
397+
expect_equal(head(mlpPredictions$prediction, 10), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 0))
398+
399+
# Test illegal parameter
400+
expect_error(spark.mlp(df, layers = NULL), "layers must be a integer vector with length > 1.")
401+
expect_error(spark.mlp(df, layers = c()), "layers must be a integer vector with length > 1.")
402+
expect_error(spark.mlp(df, layers = c(3)), "layers must be a integer vector with length > 1.")
403+
404+
# Test random seed
405+
# default seed
406+
model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10)
407+
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
408+
expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 2, 0, 1))
409+
# seed equals 10
410+
model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
411+
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
412+
expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1))
394413
})
395414

396415
test_that("spark.naiveBayes", {

mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,26 +53,26 @@ private[r] object MultilayerPerceptronClassifierWrapper
5353
def fit(
5454
data: DataFrame,
5555
blockSize: Int,
56-
layers: Array[Double],
56+
layers: Array[Int],
5757
solver: String,
5858
maxIter: Int,
5959
tol: Double,
6060
stepSize: Double,
61-
seed: Int
61+
seed: String
6262
): MultilayerPerceptronClassifierWrapper = {
6363
// get labels and feature names from output schema
6464
val schema = data.schema
6565

6666
// assemble and fit the pipeline
6767
val mlp = new MultilayerPerceptronClassifier()
68-
.setLayers(layers.map(_.toInt))
68+
.setLayers(layers)
6969
.setBlockSize(blockSize)
7070
.setSolver(solver)
7171
.setMaxIter(maxIter)
7272
.setTol(tol)
7373
.setStepSize(stepSize)
74-
.setSeed(seed)
7574
.setPredictionCol(PREDICTED_LABEL_COL)
75+
if (seed != null && seed.length > 0) mlp.setSeed(seed.toInt)
7676
val pipeline = new Pipeline()
7777
.setStages(Array(mlp))
7878
.fit(data)

0 commit comments

Comments
 (0)