首页 > 解决方案 > Convert a list to Dataframe (Spark Scala)

问题描述

I have list of object which I want to convert in to data frame

 def createDf(list: ListBuffer[TestTO]): DataFrame = {
    val structArrLength = 6
    val cols: Array[StructField] = new Array[StructField](structArrLength)
    cols(0) = StructField("a", StringType, nullable=true)
    cols(2) = StructField("b", StringType, nullable=true)
    cols(3) = StructField("c", StringType, nullable=true)
    cols(4) = StructField("d", DateType, nullable=true)
    cols(5) = StructField("e", BooleanType, nullable=true)

    val schema = StructType(cols)
    val rddMap=sparkSession.sparkContext.parallelize(list)
    val rows = rddMap.map {  x => Row(Seq(x))}
    val df = sparkSession.createDataFrame(rows, schema)
    df.show(false)
}

I am getting following error :- Caused by: java.lang.RuntimeException: scala.collection.immutable.$colon$colon is not a valid external type for schema of string

I am new to scala and spark world. If someone can help me to covert list to dataframe in spark/scala , it would be great help.

thank you

Solution :-

  def createDf(list: ListBuffer[MaskScheduleTO]): Dataset[Row] = {
    val schema = createMaskSchema
    var data: ListBuffer[Row] = new ListBuffer[Row]
    list.foreach(x => data += Row(x.prop1,x.prop2))
    val rddMap = sparkSession.sparkContext.parallelize(data.toList)
    sparkSession.createDataFrame(rddMap, schema)
  }
  
   private def createMaskSchema = {
    StructType(List(
      StructField("prop1", StringType, nullable = true),
      StructField("prop2", StringType, nullable = true),
    ))
  }

标签: scaladataframeapache-spark

解决方案


解决方案 :-

  def createDf(list: ListBuffer[MaskScheduleTO]): Dataset[Row] = {
    val schema = createMaskSchema
    var data: ListBuffer[Row] = new ListBuffer[Row]
    list.foreach(x => data += Row(x.prop1,x.prop2))
    val rddMap = sparkSession.sparkContext.parallelize(data.toList)
    sparkSession.createDataFrame(rddMap, schema)
  }
  
   private def createMaskSchema = {
    StructType(List(
      StructField("prop1", StringType, nullable = true),
      StructField("prop2", StringType, nullable = true),
    ))
  }

推荐阅读