首页 > 解决方案 > Elasticsearch Translog 已关闭错误

问题描述

我有弹性集群,由 8 个热节点、4 个暖节点、3 个主节点和 2 个协调节点创建。我仍然在随机节点上收到错误,然后该节点与集群断开连接。

[2018-09-11T07:38:48,692][ERROR][o.e.b.ElasticsearchUncaughtExceptionHandler] [elk-es-data-hot-1.osdc2.company.local] fatal error in thread [elasticsearch[elk-es-data-hot-1.osdc2.company.local][bulk][T#4]], exiting
java.lang.AssertionError: Unexpected AlreadyClosedException
        at org.elasticsearch.index.engine.InternalEngine.failOnTragicEvent(InternalEngine.java:1786) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.engine.InternalEngine.maybeFailEngine(InternalEngine.java:1803) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:905) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.shard.IndexShard.index(IndexShard.java:738) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.shard.IndexShard.applyIndexOperation(IndexShard.java:707) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.shard.IndexShard.applyIndexOperationOnReplica(IndexShard.java:680) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.action.bulk.TransportShardBulkAction.performOpOnReplica(TransportShardBulkAction.java:518) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.action.bulk.TransportShardBulkAction.performOnReplica(TransportShardBulkAction.java:480) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.action.bulk.TransportShardBulkAction.shardOperationOnReplica(TransportShardBulkAction.java:466) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.action.bulk.TransportShardBulkAction.shardOperationOnReplica(TransportShardBulkAction.java:72) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.action.support.replication.TransportReplicationAction$AsyncReplicaAction.onResponse(TransportReplicationAction.java:567) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.action.support.replication.TransportReplicationAction$AsyncReplicaAction.onResponse(TransportReplicationAction.java:530) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.shard.IndexShard$2.onResponse(IndexShard.java:2315) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.shard.IndexShard$2.onResponse(IndexShard.java:2293) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.shard.IndexShardOperationPermits.acquire(IndexShardOperationPermits.java:238) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.shard.IndexShard.acquireReplicaOperationPermit(IndexShard.java:2292) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.action.support.replication.TransportReplicationAction$AsyncReplicaAction.doRun(TransportReplicationAction.java:641) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.action.support.replication.TransportReplicationAction$ReplicaOperationTransportHandler.messageReceived(TransportReplicationAction.java:513) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.action.support.replication.TransportReplicationAction$ReplicaOperationTransportHandler.messageReceived(TransportReplicationAction.java:493) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:66) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.transport.TcpTransport$RequestHandler.doRun(TcpTransport.java:1555) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:672) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) ~[elasticsearch-6.2.3.jar:6.2.3]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_144]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_144]
        at java.lang.Thread.run(Thread.java:748) [?:1.8.0_144]
Caused by: org.apache.lucene.store.AlreadyClosedException: translog is already closed
        at org.elasticsearch.index.translog.Translog.ensureOpen(Translog.java:1667) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.translog.Translog.add(Translog.java:508) ~[elasticsearch-6.2.3.jar:6.2.3]
        at org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:887) ~[elasticsearch-6.2.3.jar:6.2.3]
        ... 24 more

我为所有创建的索引使用默认模板:

{
  "default": {
    "order": -1,
    "index_patterns": [
      "*"
    ],
    "settings": {
      "index": {
        "codec": "best_compression",
        "routing": {
          "allocation": {
            "include": {
              "type": "hot"
            }
          }
        },
        "mapping": {
          "total_fields": {
            "limit": "4000"
          }
        },
        "refresh_interval": "30s",
        "number_of_shards": "4",
        "translog": {
          "durability": "async"
        },
        "unassigned": {
          "node_left": {
            "delayed_timeout": "20m"
          }
        },
        "number_of_replicas": "1"
      }
    },
    "mappings": {},
    "aliases": {}
  }
}

我不明白,是什么导致了问题,以及如何解决。谁能解释一下我的节点上发生了什么以及为什么?我正在使用弹性搜索 v6.2.3。我的集群仍然遇到这个问题。

标签: javaelasticsearch

解决方案


推荐阅读