python - 无法在 Jupyter Notebook 中删除或获取或创建 sparkContext
问题描述
这是我第一次尝试使用 spark 进行机器学习,我打算在 Jupyter Notebook 中使用它。
似乎我遇到了以错误方式实例化火花的问题。显然我只能实例化一个 sparkContext。
我努力了
import findspark
findspark.init()
from pyspark import SparkContext
sc = SparkContext.getOrCreate()
和
import findspark
findspark.init()
from pyspark import SparkContext
sc = sc.stop()
这是发布的两个常见解决方案。
如果我不实例化sc
,那么sc
将是一个身份不明的对象。因此,如果我通过 实例化它.sparkContext()
,它会告诉我我不能有多个火花上下文 - 我不确定在笔记本电脑的某个地方可以找到我现有的火花上下文,或者在不知道如何调用这个对象的情况下如何解决这个问题。
我还关闭了我的所有终端(除了让 Jupyter Notebook 保持打开的终端),因为我曾经$pyspark
在那里输入过它以检查它是否安装正确。
这是我的错误信息:
Py4JJavaError Traceback (most recent call last)
<ipython-input-31-eca205ca2337> in <module>
2 findspark.init()
3 from pyspark import SparkContext
----> 4 sc = SparkContext.getOrCreate()
/usr/local/opt/apache-spark/libexec/python/pyspark/context.py in getOrCreate(cls, conf)
365 with SparkContext._lock:
366 if SparkContext._active_spark_context is None:
--> 367 SparkContext(conf=conf or SparkConf())
368 return SparkContext._active_spark_context
369
/usr/local/opt/apache-spark/libexec/python/pyspark/context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
134 try:
135 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
--> 136 conf, jsc, profiler_cls)
137 except:
138 # If an error occurs, clean up in order to allow future SparkContext creation:
/usr/local/opt/apache-spark/libexec/python/pyspark/context.py in _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
196
197 # Create the Java SparkContext through Py4J
--> 198 self._jsc = jsc or self._initialize_context(self._conf._jconf)
199 # Reset the SparkConf to the one actually used by the SparkContext in JVM.
200 self._conf = SparkConf(_jconf=self._jsc.sc().conf())
/usr/local/opt/apache-spark/libexec/python/pyspark/context.py in _initialize_context(self, jconf)
304 Initialize SparkContext in function to allow subclass specific initialization
305 """
--> 306 return self._jvm.JavaSparkContext(jconf)
307
308 @classmethod
/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1523 answer = self._gateway_client.send_command(command)
1524 return_value = get_return_value(
-> 1525 answer, self._gateway_client, None, self._fqn)
1526
1527 for temp_arg in temp_args:
/usr/local/opt/apache-spark/libexec/python/pyspark/sql/utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: org.apache.spark.SparkException: Only one SparkContext may be running in this JVM (see SPARK-2243). To ignore this error, set spark.driver.allowMultipleContexts = true. The currently running SparkContext was created at:
org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
java.lang.reflect.Constructor.newInstance(Constructor.java:423)
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
py4j.Gateway.invoke(Gateway.java:238)
py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
py4j.GatewayConnection.run(GatewayConnection.java:238)
java.lang.Thread.run(Thread.java:748)
at org.apache.spark.SparkContext$$anonfun$assertNoOtherContextIsRunning$2.apply(SparkContext.scala:2483)
at org.apache.spark.SparkContext$$anonfun$assertNoOtherContextIsRunning$2.apply(SparkContext.scala:2479)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.SparkContext$.assertNoOtherContextIsRunning(SparkContext.scala:2479)
at org.apache.spark.SparkContext$.markPartiallyConstructed(SparkContext.scala:2568)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:85)
at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:238)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
谁能建议我如何解决这个问题,以便我可以实例化一个新的 sparkContext 并继续我的项目?非常感谢!!!
解决方案
推荐阅读
- c# - 无法从根提供程序解析“GraphQL.Resolvers.ICountriesResolver”,因为它需要范围服务“Query.Persistence.SampleDbContext”
- c# - Any() Linq 方法对同一个对象进行多次查询。C# EF 核心
- python - 有没有办法在 pool.apply_async 可用时累积调用的结果而不将它们收集在类似结构的列表中?
- node.js - 当 JSON 数据包含在带有 require() 的 Node.js 文件中时,它是被视为通用对象还是特殊情况?
- ios - CoreML / MLModelConfig 首选MetalDevice - 了解设备放置启发式
- notifications - 来自 PBI 报表服务器的刷新通知
- java - 使用存储过程从 sql server 快速读取百万条记录,并使用 java 和 spring boot 将其写入 csv
- ios - 如果可用,firebase ios 应用程序是否会自动监视新的构建/发布?
- c# - User.Claims 在 MVC 应用程序中为空
- javascript - 为什么 Firefox 不会让我禁用的选择选项变灰?