首页 > 解决方案 > sparksession 中的错误,因为 sparksession 无法解决

问题描述

我是激发相关工作的新手。我尝试过如下编码。包 hdd.models;

import java.util.ArrayList;
import java.util.List;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.SparkSession;

/*
 * Analysis of the data using Spark SQL
 * 
 */
public class HrtDisDataAnalyze {

    public HrtDisDataAnalyze() {

    }

    public static void main(String[] args) {
        SparkConfAndCtxBuilder ctxBuilder = new SparkConfAndCtxBuilder();
        JavaSparkContext jctx = ctxBuilder.loadSimpleSparkContext("Heart Disease Data Analysis App", "local");
        JavaRDD<String> rows = jctx.textFile("file:///C:/Users/harpr/workspace/HrtDisDetection/src/resources/full_data_cleaned.csv");

        String schemaString = "age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal num";
        List<StructField> fields = new ArrayList<>();
        for (String fieldName : schemaString.split(" ")) {
            fields.add(DataTypes.createStructField(fieldName, DataTypes.StringType, true));
        }

        StructType schema = DataTypes.createStructType(fields);
        JavaRDD<Row> rowRdd = rows.map(new Function<String, Row>() {

            @Override
            public Row call(String record) throws Exception {
                String[] fields = record.split(",");
                return RowFactory.create(fields[0],fields[1],fields[2],fields[3],fields[4],fields[5],fields[6],fields[7],fields[8],fields[9],fields[10],fields[11],fields[12],fields[13]);
            }
        });
        SparkSession sparkSession = SparkSession.builder().config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", "org.datasyslab.geospark.serde.GeoSparkKryoRegistrator").master("local[*]").appName("testGeoSpark").getOrCreate();
        Dataset df = spark.read().csv("usr/local/eclipse1/eclipse/hrtdisdetection/src/resources/cleveland_data_raw.csv");
        df.createOrReplaceTempView("heartDisData");

sparksession 中出现以下错误“无法解析他的类型 org.apache.spark.sql.SparkSession$Builder。它是从所需的 .class 文件中间接引用的”注意:我使用 spark-2.1.0 和 scala 2.10。上面的代码我在 java eclipse-neon 中试过

标签: javaapache-spark

解决方案


我为 spark 会话添加了 jar 文件。错误清除。 https://jar-download.com/?search_box=org.apache.spark%20spark.sql


推荐阅读