apache-spark - Spark 类 org/apache/spark/sql/types/DataType 的 Snappy 数据缺失依赖项
问题描述
我从 github snappy poc 下载了 snappy data poc
我正在尝试建立项目。但似乎火花版本存在依赖性问题。当我使用构建文件时gradlew generateAdImpressions
出现错误
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/sql/types/DataType
at io.snappydata.adanalytics.KafkaAdImpressionProducer$.<init>(KafkaAdImpressionProducer.scala:37)
at io.snappydata.adanalytics.KafkaAdImpressionProducer$.<clinit>(KafkaAdImpressionProducer.scala)
at io.snappydata.adanalytics.KafkaAdImpressionProducer.main(KafkaAdImpressionProducer.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.types.DataType
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 3 more
:generateAdImpressions FAILED
这是因为我认为该项目正在使用 spark_2.11:2.1.1 并且此类已在 2.1.0 版本中删除
这是gradle文件
plugins {
id 'java'
id 'com.github.johnrengelman.shadow' version '1.2.3'
id 'com.commercehub.gradle.plugin.avro' version "0.5.0"
}
archivesBaseName = 'snappy-poc'
allprojects {
version = '1.0.0'
repositories {
mavenCentral()
maven { url "https://oss.sonatype.org/content/groups/public" }
maven { url "https://oss.sonatype.org/content/repositories/snapshots" }
maven { url "http://repository.snappydata.io/repository/internal" }
maven { url "http://repository.snappydata.io/repository/snapshots" }
maven { url "http://mvnrepository.com/artifact" }
maven { url 'https://clojars.org/repo' }
}
apply plugin: 'java'
apply plugin: 'maven'
apply plugin: 'scala'
apply plugin: 'idea'
apply plugin: 'eclipse'
apply plugin: "com.commercehub.gradle.plugin.avro"
ext {
sparkVersion = '2.1.1'
snappyVersion = '1.0.0'
}
configurations.all {
resolutionStrategy.cacheChangingModulesFor 4, 'hours'
}
dependencies {
compile 'org.scala-lang:scala-library:2.11.6'
compile 'org.scala-lang:scala-reflect:2.11.6'
compile 'org.scala-lang:scala-compiler:2.11.6'
}
}
dependencies {
compileOnly "io.snappydata:snappydata-core_2.11:${snappyVersion}"
compileOnly "io.snappydata:snappydata-cluster_2.11:${snappyVersion}"
compileOnly "io.snappydata:snappy-spark-core_2.11:${sparkVersion}"
compileOnly "io.snappydata:snappy-spark-catalyst_2.11:${sparkVersion}"
compileOnly "io.snappydata:snappy-spark-sql_2.11:${sparkVersion}"
// compileOnly "io.snappydata:snappydata-aqp_2.11:${snappyVersion}"
compile 'com.miguno:kafka-avro-codec_2.10:0.1.1-SNAPSHOT'
compile 'org.apache.kafka:kafka_2.11:0.8.2.1'
compile 'com.twitter:algebird-core_2.10:0.1.11'
compile 'com.googlecode.javaewah:JavaEWAH:1.1.5'
compile 'org.joda:joda-convert:1.2'
compile 'com.opencsv:opencsv:3.3'
}
task generateAvro(type: com.commercehub.gradle.plugin.avro.GenerateAvroJavaTask) {
source("src/avro")
outputDir = file("src/main/java")
}
compileJava.source(generateAvro.outputs)
avro.stringType = "charSequence"
ext {
assemblyJar = rootProject.tasks.getByPath(':assembly:shadowJar').outputs
}
def assemblyJar = tasks.getByPath(':assembly:shadowJar').outputs
task generateAdImpressions(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.adanalytics.KafkaAdImpressionProducer'
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task aggeregateAdImpressions_API(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.adanalytics.SnappyAPILogAggregator'
jvmArgs = ['-XX:MaxPermSize=512m']
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task aggeregateAdImpressions_SQL(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.adanalytics.SnappySQLLogAggregator'
jvmArgs = ['-XX:MaxPermSize=512m']
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task generateAdImpressions_Socket(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.SocketAdImpressionGenerator'
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
maxHeapSize = "8196m"
}
task startSnappyIngestionPerf_Socket(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.SocketSnappyIngestionPerf'
jvmArgs = ['-XX:MaxPermSize=512m']
maxHeapSize = "8196m"
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task startSnappyIngestionPerf_CustomReceiver(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.CustomReceiverSnappyIngestionPerf'
jvmArgs = ['-XX:MaxPermSize=512m']
maxHeapSize = "8196m"
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task startSnappyIngestionPerf_CSV(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.CSVSnappyIngestionPerf'
jvmArgs = ['-XX:MaxPermSize=512m']
maxHeapSize = "8196m"
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task startSnappyIngestionPerf_Kafka(type: JavaExec, dependsOn: classes) {
main = 'io.snappydata.benchmark.KafkaSnappyIngestionPerf'
jvmArgs = ['-XX:MaxPermSize=512m']
maxHeapSize = "8196m"
classpath sourceSets.test.runtimeClasspath
environment 'PROJECT_ASSEMBLY_JAR', assemblyJar.files.asPath
}
task product(type: Exec) {
dependsOn ':assembly:shadowJar'
def productDir = "${rootProject.buildDir}/snappydata-poc"
def snappyData = System.env.SNAPPYDATA
if (snappyData == null || snappyData.length() == 0) {
snappyData = "${projectDir}/../snappydata"
}
doFirst {
delete productDir
file("${productDir}/lib").mkdirs()
}
// first execute the snappydata "product" target based on env var SNAPPYDATA
workingDir snappyData
commandLine './gradlew', 'copyProduct', "-PcopyToDir=${productDir}"
// lastly copy own assembly fat jar in product lib dir
doLast {
copy {
from assemblyJar
into "${productDir}/lib"
}
}
}
Spark 2.1.1 和 Spark 2.1.0 有许多相互冲突的依赖关系和缺少的类,要解决它们实在是太难了。我抬头一看,似乎 DataType 类在 2.1.0 中可用。
我是 gradle 的新手,有人可以帮我如何选择性地添加这个包含旧文件的 jar,并且仍然为其他人保留 2.1.1。
提前非常感谢。
解决方案
snappy-poc 使用 Spark 2.1.0,效果很好。所以你想把它升级到 2.1.1?我用 2.1.1.1 快速测试了它,它对我来说很好用。也许您应该尝试在 build.gradle 文件中将 spark 版本更改为 2.1.1.1
推荐阅读
- visual-studio - C++/WinRT natvis 在全新安装中不起作用
- python - 如何检查列表中是否有元素而不会出现python中的索引错误
- animation - 动画 SVG 属性填充以在动画后进行渐变填充
- image - 在磁盘上保存代表图像的几个 Numpy 数组的列表
- javascript - 在 ajax 脚本之后运行 event.preventDefault() 时,有什么方法可以按名称获取多个 POST 请求?
- relational-database - 这个简单的基于 SQL 的 ETL 解决方案有什么问题?
- python - OpenCV - 斑点/缺陷/异常检测
- python - 如果两个字符串的含义相同,如何获得概率
- c++ - 在 enum_class 中覆盖 operator++ 是否正确?
- excel - 在服务器端数据表 laravel 中导出文件时更改方程式的问题