首页 > 解决方案 > Apache Spark(Scala)代码中的索引器错误?

问题描述

这个来自 Apache Spark 文档的确切代码(复制和粘贴)给了我错误(请参阅快照)

import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}

val df = spark.createDataFrame(Seq(
(0, "a"),
(1, "b"),
(2, "c"),
(3, "a"),
(4, "a"),
(5, "c")
)).toDF("id", "category")

val indexer = new StringIndexer()
.setInputCol("category")
.setOutputCol("categoryIndex")
.fit(df)
val indexed = indexer.transform(df)

val encoder = new OneHotEncoder()
.setInputCol("categoryIndex")
.setOutputCol("categoryVec")

 val encoded = encoder.transform(indexed)
 encoded.show()

在此处输入图像描述

标签: scalaapache-sparkapache-spark-sqlone-hot-encoding

解决方案


apache-aspark中没有display方法。您可以使用show

  scala> import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
  import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}

  scala> val df = spark.createDataFrame(Seq((0, "a"),(1, "b"),(2, "c"),(3, "a"),(4, "a"),(5, "c") )).toDF("id", "category")
  df: org.apache.spark.sql.DataFrame = [id: int, category: string]

  scala> val indexer = new StringIndexer().setInputCol("category").setOutputCol("categoryIndex").fit(df)
  indexer: org.apache.spark.ml.feature.StringIndexerModel = strIdx_b1524bc6ee99

  scala> val indexed = indexer.transform(df)
  indexed: org.apache.spark.sql.DataFrame = [id: int, category: string ... 1 more field]

  scala> val encoder = new OneHotEncoder().setInputCol("categoryIndex").setOutputCol("categoryVec")
  encoder: org.apache.spark.ml.feature.OneHotEncoder = oneHot_2c4e3e2a369b

  scala> val encoded = encoder.transform(indexed)
  encoded: org.apache.spark.sql.DataFrame = [id: int, category: string ... 2 more fields]

  scala> encoded.show()
  +---+--------+-------------+-------------+
  | id|category|categoryIndex|  categoryVec|
  +---+--------+-------------+-------------+
  |  0|       a|          0.0|(2,[0],[1.0])|
  |  1|       b|          2.0|    (2,[],[])|
  |  2|       c|          1.0|(2,[1],[1.0])|
  |  3|       a|          0.0|(2,[0],[1.0])|
  |  4|       a|          0.0|(2,[0],[1.0])|
  |  5|       c|          1.0|(2,[1],[1.0])|
  +---+--------+-------------+-------------+ 

推荐阅读