首页 > 解决方案 > Filebeat 7.10 无法从多个 kubernetes pod 收集事件

问题描述

Filebeat 配置为使用 or 条件从多个 Kubernetes pod 收集事件。来自特定 pod 的事件会持续收集,但来自另一个 pod 的事件收集速度非常慢,一段时间后不会收集任何事件。

评论所有其他 pod,在配置中留下一个,效果很好,并且可以快速更新 elasticsearch 中的事件。

有 3 个工作节点,filebeat (v7.10.2) 作为守护进程运行。每个 filebeat 的 cpu 限制为 4 个核心,内存限制为 4 Gb。每天生成一个索引,索引大小不超过2Gb。

我希望 filebeat 从所有 pod 收集事件并立即更新 elasticsearch。请帮助我了解问题和提高文件节拍性能的最佳实践。

filebeat.yml -

  filebeat.autodiscover:
    providers:
      - type: kubernetes
        node: ${NODE_NAME}
        tags:
          - "kube-logs"
        templates:
          - condition.or:
              - contains:
                  kubernetes.pod.name: "ne-db-manager"
              - contains:
                  kubernetes.pod.name: "ne-mgmt"
              - contains:
                  kubernetes.pod.name: "list-manager"
              - contains:
                  kubernetes.pod.name: "scheduler-mgmt"
              - contains:
                  kubernetes.pod.name: "sync-ne"
              - contains:
                  kubernetes.pod.name: "file-manager"
              - contains:
                  kubernetes.pod.name: "dash-board"
              - contains:
                  kubernetes.pod.name: "config-manager"
              - contains:
                  kubernetes.pod.name: "report-manager"
              - contains:
                  kubernetes.pod.name: "clean-backup"
              - contains:
                  kubernetes.pod.name: "warrior"
              - contains:
                  kubernetes.pod.name: "ne-backup"
              - contains:
                  kubernetes.pod.name: "ne-restore"
            config:
              - type: container
                paths:
                  - "/var/log/containers/*-${data.kubernetes.container.id}.log"
                multiline.type: pattern
                multiline.pattern: '^[[:space:]]'
                multiline.negate: false
                multiline.match: after
  logging.level: debug
  processors:
    - drop_event:
        when.or:
           - equals:
               kubernetes.namespace: "kube-system"
           - equals:
               kubernetes.namespace: "default"
           - equals:
               kubernetes.namespace: "logging"
  output.logstash:
    hosts: ["logstash-service.logging:5044"]
    index: filebeat
    pretty: true
  setup.template.name: "filebeat"
  setup.template.pattern: "filebeat-*"

logstash.conf -

 input {
   beats {
     port => 5044
   }
 }
 filter {
   if "beats_input_codec_plain_applied" in [tags] {
     mutate {
       #rename => [ "log", "message" ]
       add_tag => [ "DBBKUP", "kubernetes" ]
     }
     mutate {
         remove_tag => ["beats_input_codec_plain_applied"]
     }
     date {
       match => ["time", "ISO8601"]
       remove_field => ["time"]
     }
     grok {
         match => { "message" => "%{TIMESTAMP_ISO8601:LogTimeStamp}%{SPACE}%{GREEDYDATA:Message}" }
         remove_field => ["message"]
         add_tag => ["DBBKUP"]
     }
     date {
       match => [ "LogTimeStamp", "yyyy-MM-dd HH:mm:ss", "ISO8601" ]
       #match => [ "LogTimeStamp", "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "ISO8601" ]
       target => "LogTimeStamp"
     }

     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "( DEBUG )" }
         add_tag => ["DEBUG"]
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "( INFO )" }
         add_tag => ["INFO"]
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "( ERROR )" }
         add_tag => ["ERROR"]
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(Exception)" }
         add_tag => ["EXCEPTION"]
       }
     }

     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(ne_management-)" }
         add_tag => ["NE_MGMT"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "NE_MGMT" in [tags] {
       grok {
         match => { "Message" => "(NE_DATA_HISTORY)" }
         add_tag => ["NE_DATA_HISTORY"]
       }
     }
     if "NE_DATA_HISTORY" in [tags] {
       grok {
         match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
         }
       json{
               source => "value"
               target => "data"
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(scheduler-)" }
         add_tag => ["SCHED"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "SCHED" in [tags] {
       grok {
         match => { "Message" => "(SCHEDULE_EXECUTED)" }
         add_tag => ["SCHEDULE_EXECUTED"]
       }
     }
     if "SCHEDULE_EXECUTED" in [tags] {
       grok {
         match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
         }
       json{
               source => "value"
               target => "data"
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(dash_board-)" }
         add_tag => ["DASHBOARD"]
         remove_tag => ["DBBKUP"]
       }
     }
    if "DASHBOARD" in [tags] {
      grok {
        match => { "Message" => "(NFS_MONITORING)" }
        add_tag => ["NFS_MONITORING"]
      }
    }
    if "NFS_MONITORING" in [tags] {
      grok {
        match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
        }
      json{
              source => "value"
              target => "data"
      }
    }
    if "DASHBOARD" in [tags] {
      grok {
        match => { "Message" => "(SYNCNE_CLEANBKUP_DATA)" }
        add_tag => ["SYNCNE_CLEANBKUP_DATA"]
      }
    }
    if "SYNCNE_CLEANBKUP_DATA" in [tags] {
      grok {
        match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
        }
      json{
              source => "value"
              target => "data"
      }
    }
    if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(config_management-)" }
         add_tag => ["CONFIG_MGMT"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(NEDBManagerApp-)" }
         add_tag => ["DO_BKUP"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "DO_BKUP" in [tags] {
       grok {
         match => { "Message" => "(BACKUP_EXECUTION_DETAILS)" }
         add_tag => ["BACKUP_EXECUTION_DETAILS"]
       }
     }
     if "BACKUP_EXECUTION_DETAILS" in [tags] {
       grok {
         match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
         }
       json{
               source => "value"
               target => "data"
       }
     }
     if "DO_BKUP" in [tags] {
       grok {
         match => { "Message" => "(NE_RESTORE_REPORT)" }
         add_tag => ["NE_RESTORE_REPORT"]
       }
     }
     if "NE_RESTORE_REPORT" in [tags] {
       grok {
         match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
         }
       json{
               source => "value"
               target => "data"
       }
     }
    if "DO_BKUP" in [tags] {
      grok {
        match => { "Message" => "(SCHEDULE_RUN_DETAILS)" }
        add_tag => ["SCHEDULE_RUN_DETAILS"]
      }
    }
    if "SCHEDULE_RUN_DETAILS" in [tags] {
      grok {
        match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
        }
      json{
              source => "value"
              target => "data"
      }
    }
    if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(ReportApp-)" }
         add_tag => ["REPORT"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(sync_ne-)" }
         add_tag => ["SYNC_NE"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "SYNC_NE" in [tags] {
       grok {
         match => { "Message" => "(NE_DATA_HISTORY)" }
         add_tag => ["NE_DATA_HISTORY"]
       }
     }
     if "NE_DATA_HISTORY" in [tags] {
       grok {
         match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
         }
       json{
               source => "value"
               target => "data"
       }
     }
     if "SYNC_NE" in [tags] {
       grok {
         match => { "Message" => "(SYNC_DATA_HISTORY)" }
         add_tag => ["SYNC_DATA_HISTORY"]
       }
     }
     if "SYNC_DATA_HISTORY" in [tags] {
       grok {
         match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
         }
       json{
               source => "value"
               target => "data"
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(file_management-)" }
         add_tag => ["FILE_MGMT"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "FILE_MGMT" in [tags] {
       grok {
         match => { "Message" => "(BACKUP_DOWNLOADED_DETAILS)" }
         add_tag => ["BACKUP_DOWNLOADED_DETAILS"]
       }
     }
     if "BACKUP_DOWNLOADED_DETAILS" in [tags] {
       grok {
         match => { "Message" => "%{USERNAME:AppName}%{SPACE}%{WORD:LogLevel}%{SPACE}%{WORD:msgkey} : %{GREEDYDATA:value}" }
         }
       json{
               source => "value"
               target => "data"
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(cleanup_backups-)" }
         add_tag => ["CLEANUP"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(list-manager-)" }
         add_tag => ["LIST_MGR"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(vz1-warrior-job)" }
         add_tag => ["JOBS"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "DBBKUP" in [tags] {
       grok {
         match => { "Message" => "(Katana Log)" }
         add_tag => ["WARRIOR"]
         remove_tag => ["DBBKUP"]
       }
     }

     if "_grokparsefailure" in [tags] {
       grok {
         match => { "Message" => "%{WORD:LogLevel}: %{GREEDYDATA:Message}" }
         remove_field => ["message"]
         add_tag => ["log"]
       }
     }

     if "DBBKUP" in [tags] and "ne-backup" in [kubernetes][pod][name] {
       grok {
         match => { "message" => "%{GREEDYDATA:bkupLog}" }
         remove_field => ["message"]
         add_tag => ["WARJOBS"]
         remove_tag => ["DBBKUP"]
       }
     }
     if "DBBKUP" in [tags] and "ne-restore" in [kubernetes][pod][name] {
       grok {
         match => { "message" => "%{GREEDYDATA:bkupLog}" }
         remove_field => ["message"]
         add_tag => ["WARJOBS"]
         remove_tag => ["DBBKUP"]
       }
     }
   }
 }
 output {
   elasticsearch {
     hosts => ["http://index.elastic:9200"]
     manage_template => false
     index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
   }
 }

我能够看到从 'ne-backup' pod 中连续收集的 'WARJOBS' 标签,但从 'ne-db-manager' pod 中收集的 'BACKUP_EXECUTION_DETAILS' 标签是在开始时收集的,后来停止收集它。

我在其他标签上也看到了这个问题。如果我只是从 filebeat 配置中注释掉其余的 pod,那么来自单个 pod 的日志会很快收集起来。

标签: elasticsearchkuberneteslogstashkibanafilebeat

解决方案


推荐阅读