v6 ga.2版本 开启n9e日志后,收集不到

Viewed 105

开启n9e日志功能后收集不到日志,配置如下

Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "file"
# # rotate by time
KeepHours = 4
# # rotate by size
RotateNum = 3
# # unit: MB
RotateSize = 256

logs目录是空的

[root@n9e n9e]# cd logs/
[root@n9e logs]# ls
[root@n9e logs]# pwd
/opt/n9e/logs
[root@n9e logs]#
4 Answers

1,配置贴全
2,./n9e这么执行看看打印什么

image.png

配置如下条评论

[哭笑],好了。多谢

二进制安装的话,默认当前目录没有logs目录,要自己创建一下。我测试了是OK的。image.png
image.png

我也是二进制安装,我把logs目录删了。重启也是不行的 logs目录都没有自动创建

你配置里[logs]模块被你删了。

[Global]
RunMode = "release"


Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "file"
# # rotate by time
KeepHours = 4
# # rotate by size
RotateNum = 3
# # unit: MB
RotateSize = 256

[HTTP]
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 17000
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = false
# whether enable pprof
PProf = false
# expose prometheus /metrics?
ExposeMetrics = true
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120

[HTTP.Pushgw]
Enable = true
# [HTTP.Pushgw.BasicAuth]
# user001 = "ccc26da7b9aba533cbb263a36c07dcc5"

再往下就和日志不相关了还需要吗?

./nie 执行结果如下

[root@n9e n9e]# ./n9e
runner.cwd: /opt/n9e
runner.hostname: n9e.novalocal
runner.fd_limits: (soft=4096, hard=4096)
runner.vm_limits: (soft=unlimited, hard=unlimited)
2023-04-25 10:12:14.026305 INFO memsto/busi_group_cache.go:108 timer: sync busi groups done, cost: 0ms, number: 7
2023-04-25 10:12:14.028594 INFO memsto/target_cache.go:163 timer: sync targets done, cost: 2ms, number: 97
2023-04-25 10:12:14.029376 INFO memsto/datasource_cache.go:107 timer: sync datasources done, cost: 0ms, number: 1
2023-04-25 10:12:14.030791 INFO memsto/alert_mute_cache.go:142 timer: sync mutes done, cost: 1ms, number: 0
2023-04-25 10:12:14.033069 INFO memsto/alert_rule_cache.go:132 timer: sync rules done, cost: 2ms, number: 27
2023-04-25 10:12:14.033558 INFO memsto/notify_config.go:69 timer: sync wbhooks done number: 0
2023-04-25 10:12:14.033874 INFO memsto/notify_config.go:83 timer: sync smtp:{Host: Port:0 User: Pass: From: InsecureSkipVerify:false Batch:0} done
2023-04-25 10:12:14.034217 INFO memsto/notify_config.go:97 timer: sync notify script done
2023-04-25 10:12:14.034714 INFO memsto/notify_config.go:111 timer: sync ibex done
2023-04-25 10:12:14.035573 INFO memsto/user_cache.go:158 timer: sync users done, cost: 0ms, number: 9
2023-04-25 10:12:14.036468 INFO memsto/user_group_cache.go:159 timer: sync user groups done, cost: 0ms, number: 5
2023-04-25 10:12:14.037285 INFO memsto/alert_subscribe_cache.go:157 timer: sync subscribes done, cost: 0ms, number: 0
2023-04-25 10:12:14.038245 INFO memsto/recording_rule_cache.go:133 timer: sync recording rules done, cost: 0ms, number: 0
2023-04-25 10:12:14.038728 DEBUG prom/reader.go:177 setClientFromPromOption: 1, {ClusterName:prometheus_local Url:http://localhost:9090 WriteAddr: BasicAuthUser: BasicAuthPass: Timeout:2000 DialTimeout:10000 MaxIdleConnsPerHost:100 Headers:[]}
2023-04-25 10:12:14.038789 INFO prom/reader.go:88 setClientFromPromOption success: 1
2023-04-25 10:12:14.045857 INFO naming/hashring.go:38 hash ring 1 rebuild [n9e.novalocal:17000]
2023-04-25 10:12:14.049636 INFO naming/hashring.go:38 hash ring 100000 rebuild [n9e.novalocal:17000]
2023-04-25 10:12:14.050029 WARNING sender/email.go:108 SMTP configurations invalid
http server listening on: 0.0.0.0:17000
2023-04-25 10:12:23.027795 DEBUG memsto/busi_group_cache.go:93 busi_group not changed
2023-04-25 10:12:23.030033 DEBUG memsto/datasource_cache.go:92 datasource not changed
2023-04-25 10:12:23.031015 INFO memsto/target_cache.go:163 timer: sync targets done, cost: 2ms, number: 97
2023-04-25 10:12:23.032933 DEBUG memsto/alert_mute_cache.go:117 alert mutes not changed
2023-04-25 10:12:23.034739 DEBUG memsto/alert_rule_cache.go:113 alert rules not changed
2023-04-25 10:12:23.036297 INFO memsto/notify_config.go:69 timer: sync wbhooks done number: 0
2023-04-25 10:12:23.036411 DEBUG memsto/user_cache.go:138 users not changed
2023-04-25 10:12:23.036603 INFO memsto/notify_config.go:83 timer: sync smtp:{Host: Port:0 User: Pass: From: InsecureSkipVerify:false Batch:0} done
2023-04-25 10:12:23.036892 DEBUG memsto/user_group_cache.go:120 user_group not changed
2023-04-25 10:12:23.037014 INFO memsto/notify_config.go:97 timer: sync notify script done
2023-04-25 10:12:23.037444 INFO memsto/notify_config.go:111 timer: sync ibex done
2023-04-25 10:12:23.037832 DEBUG memsto/alert_subscribe_cache.go:119 alert subscribes not changed
2023-04-25 10:12:23.039636 DEBUG memsto/recording_rule_cache.go:114 recoding rules not changed
2023-04-25 10:12:23.051455 DEBUG naming/hashring.go:55 datasource id:0 pk:30 failed to get node from hashring:empty circle
2023-04-25 10:12:23.051545 DEBUG naming/hashring.go:55 datasource id:0 pk:28 failed to get node from hashring:empty circle
2023-04-25 10:12:23.051563 DEBUG naming/hashring.go:55 datasource id:0 pk:27 failed to get node from hashring:empty circle
2023-04-25 10:12:23.051590 DEBUG naming/hashring.go:55 datasource id:0 pk:29 failed to get node from hashring:empty circle
2023-04-25 10:12:23.051632 DEBUG naming/hashring.go:55 datasource id:0 pk:26 failed to get node from hashring:empty circle
2023-04-25 10:12:23.053531 INFO eval/eval.go:67 eval:alert-1-33 started
2023-04-25 10:12:23.055048 INFO eval/eval.go:67 eval:alert-1-36 started
2023-04-25 10:12:23.055837 INFO eval/eval.go:67 eval:alert-1-30 started
2023-04-25 10:12:23.056697 DEBUG eval/eval.go:162 rule_eval:alert-1-33 query:{PromQl:round((time() - cloud_application_timeout{ident="prod", state="Running"})/60) > 720  unless on(sessionID) (cloud_application_timeout{ident="prod", state="Terminated"}) Severity:3}, value:
2023-04-25 10:12:23.056881 DEBUG eval/eval.go:162 rule_eval:alert-1-36 query:{PromQl:jn_psp{type="vis"} != 1 Severity:1}, value:
2023-04-25 10:12:23.057066 INFO eval/eval.go:67 eval:alert-1-5 started
2023-04-25 10:12:23.057485 INFO eval/eval.go:67 eval:alert-1-2 started
2023-04-25 10:12:23.057892 INFO eval/eval.go:67 eval:alert-1-15 started
2023-04-25 10:12:23.058670 INFO eval/eval.go:67 eval:alert-0-35 started
2023-04-25 10:12:23.059607 INFO eval/eval.go:67 eval:alert-1-32 started
2023-04-25 10:12:23.061526 INFO eval/eval.go:67 eval:alert-1-8 started
2023-04-25 10:12:23.061635 DEBUG eval/eval.go:162 rule_eval:alert-1-15 query:{PromQl:service_status != 1 Severity:3}, value:
2023-04-25 10:12:23.062335 INFO eval/eval.go:67 eval:alert-1-14 started
2023-04-25 10:12:23.063092 DEBUG eval/eval.go:162 rule_eval:alert-1-5 query:{PromQl:ping_result_code != 0 Severity:1}, value:
2023-04-25 10:12:23.066585 DEBUG eval/eval.go:162 rule_eval:alert-1-14 query:{PromQl:disk_used_percent > 98 Severity:1}, value:
2023-04-25 10:12:23.067744 DEBUG eval/eval.go:162 rule_eval:alert-1-8 query:{PromQl:mem_available_percent < 5 Severity:1}, value:
2023-04-25 10:12:23.067840 INFO eval/eval.go:67 eval:alert-1-3 started
2023-04-25 10:12:23.068613 DEBUG eval/eval.go:162 rule_eval:alert-1-32 query:{PromQl:round((time() - cloud_application_timeout{ident="test", state="Running"})/60) > 10  unless on(sessionID) (cloud_application_timeout{ident="test", state="Terminated"}) Severity:2}, value:
2023-04-25 10:12:23.070987 INFO eval/eval.go:67 eval:alert-1-26 started
2023-04-25 10:12:23.072731 DEBUG eval/eval.go:162 rule_eval:alert-1-3 query:{PromQl:service_status != 1 Severity:1}, value:
2023-04-25 10:12:23.072834 INFO eval/eval.go:67 eval:alert-1-34 started
2023-04-25 10:12:23.073374 INFO eval/eval.go:67 eval:alert-0-11 started
2023-04-25 10:12:23.073781 DEBUG eval/eval.go:162 rule_eval:alert-1-2 query:{PromQl:disk_used{fstype=~"ext4|xfs|fuse.juicefs"}/disk_total{fstype=~"ext4|xfs|fuse.juicefs"} * 100 >= 90 Severity:1}, value:
2023-04-25 10:12:23.074869 DEBUG eval/eval.go:162 rule_eval:alert-1-34 query:{PromQl:disk_used_percent{location="jn-psp"}> 90 Severity:2}, value:
2023-04-25 10:12:23.076380 INFO eval/eval.go:67 eval:alert-0-7 started
2023-04-25 10:12:23.076991 INFO eval/eval.go:67 eval:alert-1-20 started
2023-04-25 10:12:23.077590 INFO eval/eval.go:67 eval:alert-1-6 started
2023-04-25 10:12:23.079768 DEBUG eval/eval.go:162 rule_eval:alert-1-30 query:{PromQl:round((time() - job_state_duration{ident="dev", state="Completed"})/60) > 2  unless on(jobID) job_state_duration{state="FINISHED"} Severity:2}, value:
2023-04-25 10:12:23.081729 INFO eval/eval.go:67 eval:alert-1-25 started
2023-04-25 10:12:23.081774 DEBUG eval/eval.go:162 rule_eval:alert-1-6 query:{PromQl:(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - (node_memory_Cached_bytes + node_memory_Buffers_bytes))/node_memory_MemTotal_bytes*100 > 95 Severity:1}, value:
2023-04-25 10:12:23.087491 INFO eval/eval.go:67 eval:alert-1-19 started
2023-04-25 10:12:23.089491 DEBUG eval/eval.go:162 rule_eval:alert-1-26 query:{PromQl:round((time() - job_state_duration{ident="dev", state="SCJobPending"})/60) > 2 unless on(jobID) (job_state_duration{ident="dev", state="SCJobRunning"} or job_state_duration{ident="dev", state="Failed"} or job_state_duration{ident="dev", state="Cancelled"}) Severity:2}, value:{ident="dev", instance="http://localhost:24231/metrics", jobID="4Q3MVrqhkUL", state="SCJobPending", unixTime="1682327575"} => 1019 @[1682388743.076]
2023-04-25 10:12:23.089615 DEBUG process/process.go:288 rule_eval:alert-1-26 event:&{Id:0 Cate:prometheus Cluster:prometheus_local DatasourceId:1 GroupId:6 GroupName:dev_cloud Hash:81cd620c22c6b6baa43a5d1f0ffd37ff RuleId:26 RuleName:dev-作业排队超过2分钟!!! RuleNote: RuleProd:metric RuleAlgo: Severity:2 PromForDuration:60 PromQl: RuleConfig:{"queries":[{"prom_ql":"round((time() - job_state_duration{ident=\"dev\", state=\"SCJobPending\"})/60) \u003e 2 unless on(jobID) (job_state_duration{ident=\"dev\", state=\"SCJobRunning\"} or job_state_duration{ident=\"dev\", state=\"Failed\"} or job_state_duration{ident=\"dev\", state=\"Cancelled\"})","severity":2}]} RuleConfigJson:map[queries:[map[prom_ql:round((time() - job_state_duration{ident="dev", state="SCJobPending"})/60) > 2 unless on(jobID) (job_state_duration{ident="dev", state="SCJobRunning"} or job_state_duration{ident="dev", state="Failed"} or job_state_duration{ident="dev", state="Cancelled"}) severity:2]]] PromEvalInterval:60 Callbacks: CallbacksJSON:[] RunbookUrl: NotifyRecovered:0 NotifyChannels:wecom NotifyChannelsJSON:[wecom] NotifyGroups:3 NotifyGroupsJSON:[3] NotifyGroupsObj:[] TargetIdent:dev TargetNote: TriggerTime:1682388743 TriggerValue:1019 Tags:ident=dev,,instance=http://localhost:24231/metrics,,jobID=4Q3MVrqhkUL,,rulename=dev-作业排队超过2分钟!!!,,state=SCJobPending,,unixTime=1682327575 TagsJSON:[ident=dev instance=http://localhost:24231/metrics jobID=4Q3MVrqhkUL rulename=dev-作业排队超过2分钟!!! state=SCJobPending unixTime=1682327575] TagsMap:map[ident:dev instance:http://localhost:24231/metrics jobID:4Q3MVrqhkUL rulename:dev-作业排队超过2分钟!!! state:SCJobPending unixTime:1682327575] Annotations:{} AnnotationsJSON:map[] IsRecovered:false NotifyUsersObj:[] LastEvalTime:1682388743 LastSentTime:0 NotifyCurNumber:0 FirstTriggerTime:0} fire
2023-04-25 10:12:23.089713 DEBUG process/process.go:309 rule_eval:alert-1-26 event:&{Id:0 Cate:prometheus Cluster:prometheus_local DatasourceId:1 GroupId:6 GroupName:dev_cloud Hash:81cd620c22c6b6baa43a5d1f0ffd37ff RuleId:26 RuleName:dev-作业排队超过2分钟!!! RuleNote: RuleProd:metric RuleAlgo: Severity:2 PromForDuration:60 PromQl: RuleConfig:{"queries":[{"prom_ql":"round((time() - job_state_duration{ident=\"dev\", state=\"SCJobPending\"})/60) \u003e 2 unless on(jobID) (job_state_duration{ident=\"dev\", state=\"SCJobRunning\"} or job_state_duration{ident=\"dev\", state=\"Failed\"} or job_state_duration{ident=\"dev\", state=\"Cancelled\"})","severity":2}]} RuleConfigJson:map[queries:[map[prom_ql:round((time() - job_state_duration{ident="dev", state="SCJobPending"})/60) > 2 unless on(jobID) (job_state_duration{ident="dev", state="SCJobRunning"} or job_state_duration{ident="dev", state="Failed"} or job_state_duration{ident="dev", state="Cancelled"}) severity:2]]] PromEvalInterval:60 Callbacks: CallbacksJSON:[] RunbookUrl: NotifyRecovered:0 NotifyChannels:wecom NotifyChannelsJSON:[wecom] NotifyGroups:3 NotifyGroupsJSON:[3] NotifyGroupsObj:[] TargetIdent:dev TargetNote: TriggerTime:1682388743 TriggerValue:1019 Tags:ident=dev,,instance=http://localhost:24231/metrics,,jobID=4Q3MVrqhkUL,,rulename=dev-作业排队超过2分钟!!!,,state=SCJobPending,,unixTime=1682327575 TagsJSON:[ident=dev instance=http://localhost:24231/metrics jobID=4Q3MVrqhkUL rulename=dev-作业排队超过2分钟!!! state=SCJobPending unixTime=1682327575] TagsMap:map[ident:dev instance:http://localhost:24231/metrics jobID:4Q3MVrqhkUL rulename:dev-作业排队超过2分钟!!! state:SCJobPending unixTime:1682327575] Annotations:{} AnnotationsJSON:map[] IsRecovered:false NotifyUsersObj:[] LastEvalTime:1682388743 LastSentTime:0 NotifyCurNumber:0 FirstTriggerTime:0} reach max number
2023-04-25 10:12:23.090714 INFO eval/eval.go:67 eval:alert-1-28 started
2023-04-25 10:12:23.094445 DEBUG eval/eval.go:162 rule_eval:alert-1-28 query:{PromQl:round((time() - job_state_duration{ident="dev", state="SCJobRunning"})/60) > 2 unless on(jobID) (job_state_duration{ident="dev", state="Completed"} or job_state_duration{ident="dev", state="Failed"}or job_state_duration{ident="dev", state="Cancelled"}) Severity:2}, value:
2023-04-25 10:12:23.094776 INFO eval/eval.go:67 eval:alert-1-27 started
2023-04-25 10:12:23.095436 INFO eval/eval.go:67 eval:alert-1-12 started
2023-04-25 10:12:23.096452 INFO eval/eval.go:67 eval:alert-1-29 started
2023-04-25 10:12:23.102633 INFO eval/eval.go:67 eval:alert-1-13 started
2023-04-25 10:12:23.102666 DEBUG eval/eval.go:162 rule_eval:alert-1-12 query:{PromQl:mem_available_percent < 5 Severity:1}, value:
2023-04-25 10:12:23.103239 INFO eval/eval.go:67 eval:alert-1-24 started
2023-04-25 10:12:23.103885 DEBUG eval/eval.go:162 rule_eval:alert-1-25 query:{PromQl:round((time() - job_state_duration{ident="prod", state="SCJobPending"})/60) > 30 unless on(jobID) (job_state_duration{ident="prod", state="SCJobRunning"} or job_state_duration{ident="prod", state="Failed"} or job_state_duration{ident="prod", state="Completed"}) Severity:2}, value:{ident="prod", instance="localhost:24231", jobID="4Q45wXGjFk7", state="SCJobPending", unixTime="1682067108"} => 5361 @[1682388743.082]
2023-04-25 10:12:23.103954 DEBUG process/process.go:288 rule_eval:alert-1-25 event:&{Id:0 Cate:prometheus Cluster:prometheus_local DatasourceId:1 GroupId:2 GroupName:远算云 Hash:08eda5fa773ce43b1e5bfd47c60663a8 RuleId:25 RuleName:prod-作业排队超过30分钟 RuleNote: RuleProd:metric RuleAlgo: Severity:2 PromForDuration:60 PromQl: RuleConfig:{"queries":[{"prom_ql":"round((time() - job_state_duration{ident=\"prod\", state=\"SCJobPending\"})/60) \u003e 30 unless on(jobID) (job_state_duration{ident=\"prod\", state=\"SCJobRunning\"} or job_state_duration{ident=\"prod\", state=\"Failed\"} or job_state_duration{ident=\"prod\", state=\"Completed\"})","severity":2}]} RuleConfigJson:map[queries:[map[prom_ql:round((time() - job_state_duration{ident="prod", state="SCJobPending"})/60) > 30 unless on(jobID) (job_state_duration{ident="prod", state="SCJobRunning"} or job_state_duration{ident="prod", state="Failed"} or job_state_duration{ident="prod", state="Completed"}) severity:2]]] PromEvalInterval:60 Callbacks: CallbacksJSON:[] RunbookUrl: NotifyRecovered:1 NotifyChannels:wecom NotifyChannelsJSON:[wecom] NotifyGroups:2 NotifyGroupsJSON:[2] NotifyGroupsObj:[] TargetIdent:prod TargetNote: TriggerTime:1682388743 TriggerValue:5361 Tags:ident=prod,,instance=localhost:24231,,jobID=4Q45wXGjFk7,,rulename=prod-作业排队超过30分钟,,state=SCJobPending,,unixTime=1682067108 TagsJSON:[ident=prod instance=localhost:24231 jobID=4Q45wXGjFk7 rulename=prod-作业排队超过30分钟 state=SCJobPending unixTime=1682067108] TagsMap:map[ident:prod instance:localhost:24231 jobID:4Q45wXGjFk7 rulename:prod-作业排队超过30分钟 state:SCJobPending unixTime:1682067108] Annotations:{} AnnotationsJSON:map[] IsRecovered:false NotifyUsersObj:[] LastEvalTime:1682388743 LastSentTime:0 NotifyCurNumber:0 FirstTriggerTime:0} fire
2023-04-25 10:12:23.104034 DEBUG process/process.go:309 rule_eval:alert-1-25 event:&{Id:0 Cate:prometheus Cluster:prometheus_local DatasourceId:1 GroupId:2 GroupName:远算云 Hash:08eda5fa773ce43b1e5bfd47c60663a8 RuleId:25 RuleName:prod-作业排队超过30分钟 RuleNote: RuleProd:metric RuleAlgo: Severity:2 PromForDuration:60 PromQl: RuleConfig:{"queries":[{"prom_ql":"round((time() - job_state_duration{ident=\"prod\", state=\"SCJobPending\"})/60) \u003e 30 unless on(jobID) (job_state_duration{ident=\"prod\", state=\"SCJobRunning\"} or job_state_duration{ident=\"prod\", state=\"Failed\"} or job_state_duration{ident=\"prod\", state=\"Completed\"})","severity":2}]} RuleConfigJson:map[queries:[map[prom_ql:round((time() - job_state_duration{ident="prod", state="SCJobPending"})/60) > 30 unless on(jobID) (job_state_duration{ident="prod", state="SCJobRunning"} or job_state_duration{ident="prod", state="Failed"} or job_state_duration{ident="prod", state="Completed"}) severity:2]]] PromEvalInterval:60 Callbacks: CallbacksJSON:[] RunbookUrl: NotifyRecovered:1 NotifyChannels:wecom NotifyChannelsJSON:[wecom] NotifyGroups:2 NotifyGroupsJSON:[2] NotifyGroupsObj:[] TargetIdent:prod TargetNote: TriggerTime:1682388743 TriggerValue:5361 Tags:ident=prod,,instance=localhost:24231,,jobID=4Q45wXGjFk7,,rulename=prod-作业排队超过30分钟,,state=SCJobPending,,unixTime=1682067108 TagsJSON:[ident=prod instance=localhost:24231 jobID=4Q45wXGjFk7 rulename=prod-作业排队超过30分钟 state=SCJobPending unixTime=1682067108] TagsMap:map[ident:prod instance:localhost:24231 jobID:4Q45wXGjFk7 rulename:prod-作业排队超过30分钟 state:SCJobPending unixTime:1682067108] Annotations:{} AnnotationsJSON:map[] IsRecovered:false NotifyUsersObj:[] LastEvalTime:1682388743 LastSentTime:0 NotifyCurNumber:0 FirstTriggerTime:0} reach max number
2023-04-25 10:12:23.104590 INFO eval/eval.go:67 eval:alert-1-37 started
2023-04-25 10:12:23.105332 INFO eval/eval.go:67 eval:alert-1-9 started
...............
...............

2023/04/25 10:14:18 /Users/ning/gopath/src/github.com/ccfos/nightingale/pushgw/idents/idents.go:90 SLOW SQL >= 200ms
[201.727ms] [rows:42] UPDATE `target` SET `update_at`=1682388858 WHERE ident in ('k8sdev-categraf-n9e-fwnv7','zjy-jump_10.10.0.201','dev-new-worker3','cn04_172.16.1.8','gwcaedemo-1.117.192.82','dev-new-master1','k8sdev-categraf-n9e-kvkms','phab-10.0.6.91','beegfs01_172.16.1.251','gw-store-prod-43.142.132.111','wiki-10.0.7.243','mztest3-115.159.5.26','ipsec-123.206.114.26','dev-new.yuansuan.cn','kw-test_172.17.64.6','swys-192.168.216.251','vis_gn02_1_test','n9e-10.0.6.2','compute03_192.168.11.13','rancher-110.40.154.185','vis-test-81.68.123.110','kw-nexus_172.17.64.14','mz2-122.51.90.49','compute11_192.168.11.21','vis_gn01_1','vis_gn02_2','k8sdev-categraf-n9e-4m9lr','dev','k8sdev-categraf-n9e-6s4sh','compute09_192.168.11.19','cn01_172.16.1.1','compute02_192.168.11.12','jn-k8s-n9e-agent-zerotier-z2hw4','compute04_192.168.11.14','compute12_192.168.11.22','mz1-122.51.91.220','testsc-111.229.185.237','gw-store-dev-1.15.239.253','mz3-49.235.127.137','cn03_172.16.1.7','vis_gn03_1','vis_gn01_2')
2023-04-25 10:14:20.047311 DEBUG memsto/busi_group_cache.go:93 busi_group not changed
2023-04-25 10:14:20.047350 DEBUG memsto/datasource_cache.go:92 datasource not changed
2023-04-25 10:14:20.051482 DEBUG memsto/alert_rule_cache.go:113 alert rules not changed
2023-04-25 10:14:20.052877 DEBUG memsto/recording_rule_cache.go:114 recoding rules not changed
2023-04-25 10:14:20.053624 DEBUG memsto/user_cache.go:138 users not changed
2023-04-25 10:14:20.053653 DEBUG memsto/user_group_cache.go:120 user_group not changed
2023-04-25 10:14:20.053664 DEBUG memsto/alert_subscribe_cache.go:119 alert subscribes not changed
2023-04-25 10:14:20.076475 DEBUG memsto/alert_mute_cache.go:117 alert mutes not changed
2023-04-25 10:14:20.077441 INFO memsto/target_cache.go:163 timer: sync targets done, cost: 1ms, number: 97
2023-04-25 10:14:20.086269 INFO memsto/notify_config.go:69 timer: sync wbhooks done number: 0
2023-04-25 10:14:20.086685 INFO memsto/notify_config.go:83 timer: sync smtp:{Host: Port:0 User: Pass: From: InsecureSkipVerify:false Batch:0} done
2023-04-25 10:14:20.087037 INFO memsto/notify_config.go:97 timer: sync notify script done
2023-04-25 10:14:20.087429 INFO memsto/notify_config.go:111 timer: sync ibex done
2023-04-25 10:14:20.116576 DEBUG naming/hashring.go:55 datasource id:0 pk:28 failed to get node from hashring:empty circle
2023-04-25 10:14:20.116667 DEBUG naming/hashring.go:55 datasource id:0 pk:27 failed to get node from hashring:empty circle
2023-04-25 10:14:20.116722 DEBUG naming/hashring.go:55 datasource id:0 pk:29 failed to get node from hashring:empty circle
2023-04-25 10:14:20.116782 DEBUG naming/hashring.go:55 datasource id:0 pk:26 failed to get node from hashring:empty circle
2023-04-25 10:14:20.116802 DEBUG naming/hashring.go:55 datasource id:0 pk:30 failed to get node from hashring:empty circle
2023-04-25 10:14:23.069018 DEBUG eval/eval.go:162 rule_eval:alert-1-36 query:{PromQl:jn_psp{type="vis"} != 1 Severity:1}, value:
2023-04-25 10:14:23.069242 DEBUG eval/eval.go:162 rule_eval:alert-1-33 query:{PromQl:round((time() - cloud_application_timeout{ident="prod", state="Running"})/60) > 720  unless on(sessionID) (cloud_application_timeout{ident="prod", state="Terminated"}) Severity:3}, value:
2023-04-25 10:14:23.080703 DEBUG eval/eval.go:162 rule_eval:alert-1-15 query:{PromQl:service_status != 1 Severity:3}, value:
2023-04-25 10:14:23.082724 DEBUG eval/eval.go:162 rule_eval:alert-1-8 query:{PromQl:mem_available_percent < 5 Severity:1}, value:
2023-04-25 10:14:23.087592 DEBUG eval/eval.go:162 rule_eval:alert-1-32 query:{PromQl:round((time() - cloud_application_timeout{ident="test", state="Running"})/60) > 10  unless on(sessionID) (cloud_application_timeout{ident="test", state="Terminated"}) Severity:2}, value:
2023-04-25 10:14:23.094646 DEBUG eval/eval.go:162 rule_eval:alert-1-34 query:{PromQl:disk_used_percent{location="jn-psp"}> 90 Severity:2}, value:
2023-04-25 10:14:23.094763 DEBUG eval/eval.go:162 rule_eval:alert-1-14 query:{PromQl:disk_used_percent > 98 Severity:1}, value:
2023-04-25 10:14:23.094769 DEBUG eval/eval.go:162 rule_eval:alert-1-5 query:{PromQl:ping_result_code != 0 Severity:1}, value:
2023-04-25 10:14:23.095443 DEBUG eval/eval.go:162 rule_eval:alert-1-3 query:{PromQl:service_status != 1 Severity:1}, value:

我补充了我的答案

[Global]
RunMode = "release"


Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "file"
# # rotate by time
KeepHours = 4
# # rotate by size
RotateNum = 3
# # unit: MB
RotateSize = 256

[HTTP]
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 17000
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = false
# whether enable pprof
PProf = false
# expose prometheus /metrics?
ExposeMetrics = true
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120

[HTTP.Pushgw]
Enable = true
# [HTTP.Pushgw.BasicAuth]
# user001 = "ccc26da7b9aba533cbb263a36c07dcc5"

再往下就和日志不相关了还需要吗?

./nie 执行结果如下

[root@n9e n9e]# ./n9e
runner.cwd: /opt/n9e
runner.hostname: n9e.novalocal
runner.fd_limits: (soft=4096, hard=4096)
runner.vm_limits: (soft=unlimited, hard=unlimited)
2023-04-25 10:12:14.026305 INFO memsto/busi_group_cache.go:108 timer: sync busi groups done, cost: 0ms, number: 7
2023-04-25 10:12:14.028594 INFO memsto/target_cache.go:163 timer: sync targets done, cost: 2ms, number: 97
2023-04-25 10:12:14.029376 INFO memsto/datasource_cache.go:107 timer: sync datasources done, cost: 0ms, number: 1
2023-04-25 10:12:14.030791 INFO memsto/alert_mute_cache.go:142 timer: sync mutes done, cost: 1ms, number: 0
2023-04-25 10:12:14.033069 INFO memsto/alert_rule_cache.go:132 timer: sync rules done, cost: 2ms, number: 27
2023-04-25 10:12:14.033558 INFO memsto/notify_config.go:69 timer: sync wbhooks done number: 0
2023-04-25 10:12:14.033874 INFO memsto/notify_config.go:83 timer: sync smtp:{Host: Port:0 User: Pass: From: InsecureSkipVerify:false Batch:0} done
2023-04-25 10:12:14.034217 INFO memsto/notify_config.go:97 timer: sync notify script done
2023-04-25 10:12:14.034714 INFO memsto/notify_config.go:111 timer: sync ibex done
2023-04-25 10:12:14.035573 INFO memsto/user_cache.go:158 timer: sync users done, cost: 0ms, number: 9
2023-04-25 10:12:14.036468 INFO memsto/user_group_cache.go:159 timer: sync user groups done, cost: 0ms, number: 5
2023-04-25 10:12:14.037285 INFO memsto/alert_subscribe_cache.go:157 timer: sync subscribes done, cost: 0ms, number: 0
2023-04-25 10:12:14.038245 INFO memsto/recording_rule_cache.go:133 timer: sync recording rules done, cost: 0ms, number: 0
2023-04-25 10:12:14.038728 DEBUG prom/reader.go:177 setClientFromPromOption: 1, {ClusterName:prometheus_local Url:http://localhost:9090 WriteAddr: BasicAuthUser: BasicAuthPass: Timeout:2000 DialTimeout:10000 MaxIdleConnsPerHost:100 Headers:[]}
2023-04-25 10:12:14.038789 INFO prom/reader.go:88 setClientFromPromOption success: 1
2023-04-25 10:12:14.045857 INFO naming/hashring.go:38 hash ring 1 rebuild [n9e.novalocal:17000]
2023-04-25 10:12:14.049636 INFO naming/hashring.go:38 hash ring 100000 rebuild [n9e.novalocal:17000]
2023-04-25 10:12:14.050029 WARNING sender/email.go:108 SMTP configurations invalid
http server listening on: 0.0.0.0:17000
2023-04-25 10:12:23.027795 DEBUG memsto/busi_group_cache.go:93 busi_group not changed
2023-04-25 10:12:23.030033 DEBUG memsto/datasource_cache.go:92 datasource not changed
2023-04-25 10:12:23.031015 INFO memsto/target_cache.go:163 timer: sync targets done, cost: 2ms, number: 97
2023-04-25 10:12:23.032933 DEBUG memsto/alert_mute_cache.go:117 alert mutes not changed
2023-04-25 10:12:23.034739 DEBUG memsto/alert_rule_cache.go:113 alert rules not changed
2023-04-25 10:12:23.036297 INFO memsto/notify_config.go:69 timer: sync wbhooks done number: 0
2023-04-25 10:12:23.036411 DEBUG memsto/user_cache.go:138 users not changed
2023-04-25 10:12:23.036603 INFO memsto/notify_config.go:83 timer: sync smtp:{Host: Port:0 User: Pass: From: InsecureSkipVerify:false Batch:0} done
2023-04-25 10:12:23.036892 DEBUG memsto/user_group_cache.go:120 user_group not changed
2023-04-25 10:12:23.037014 INFO memsto/notify_config.go:97 timer: sync notify script done
2023-04-25 10:12:23.037444 INFO memsto/notify_config.go:111 timer: sync ibex done
2023-04-25 10:12:23.037832 DEBUG memsto/alert_subscribe_cache.go:119 alert subscribes not changed
2023-04-25 10:12:23.039636 DEBUG memsto/recording_rule_cache.go:114 recoding rules not changed
2023-04-25 10:12:23.051455 DEBUG naming/hashring.go:55 datasource id:0 pk:30 failed to get node from hashring:empty circle
2023-04-25 10:12:23.051545 DEBUG naming/hashring.go:55 datasource id:0 pk:28 failed to get node from hashring:empty circle
2023-04-25 10:12:23.051563 DEBUG naming/hashring.go:55 datasource id:0 pk:27 failed to get node from hashring:empty circle
2023-04-25 10:12:23.051590 DEBUG naming/hashring.go:55 datasource id:0 pk:29 failed to get node from hashring:empty circle
2023-04-25 10:12:23.051632 DEBUG naming/hashring.go:55 datasource id:0 pk:26 failed to get node from hashring:empty circle
2023-04-25 10:12:23.053531 INFO eval/eval.go:67 eval:alert-1-33 started
2023-04-25 10:12:23.055048 INFO eval/eval.go:67 eval:alert-1-36 started
2023-04-25 10:12:23.055837 INFO eval/eval.go:67 eval:alert-1-30 started
2023-04-25 10:12:23.056697 DEBUG eval/eval.go:162 rule_eval:alert-1-33 query:{PromQl:round((time() - cloud_application_timeout{ident="prod", state="Running"})/60) > 720  unless on(sessionID) (cloud_application_timeout{ident="prod", state="Terminated"}) Severity:3}, value:
2023-04-25 10:12:23.056881 DEBUG eval/eval.go:162 rule_eval:alert-1-36 query:{PromQl:jn_psp{type="vis"} != 1 Severity:1}, value:
2023-04-25 10:12:23.057066 INFO eval/eval.go:67 eval:alert-1-5 started
2023-04-25 10:12:23.057485 INFO eval/eval.go:67 eval:alert-1-2 started
2023-04-25 10:12:23.057892 INFO eval/eval.go:67 eval:alert-1-15 started
2023-04-25 10:12:23.058670 INFO eval/eval.go:67 eval:alert-0-35 started
2023-04-25 10:12:23.059607 INFO eval/eval.go:67 eval:alert-1-32 started
2023-04-25 10:12:23.061526 INFO eval/eval.go:67 eval:alert-1-8 started
2023-04-25 10:12:23.061635 DEBUG eval/eval.go:162 rule_eval:alert-1-15 query:{PromQl:service_status != 1 Severity:3}, value:
2023-04-25 10:12:23.062335 INFO eval/eval.go:67 eval:alert-1-14 started
2023-04-25 10:12:23.063092 DEBUG eval/eval.go:162 rule_eval:alert-1-5 query:{PromQl:ping_result_code != 0 Severity:1}, value:
2023-04-25 10:12:23.066585 DEBUG eval/eval.go:162 rule_eval:alert-1-14 query:{PromQl:disk_used_percent > 98 Severity:1}, value:
2023-04-25 10:12:23.067744 DEBUG eval/eval.go:162 rule_eval:alert-1-8 query:{PromQl:mem_available_percent < 5 Severity:1}, value:
2023-04-25 10:12:23.067840 INFO eval/eval.go:67 eval:alert-1-3 started
2023-04-25 10:12:23.068613 DEBUG eval/eval.go:162 rule_eval:alert-1-32 query:{PromQl:round((time() - cloud_application_timeout{ident="test", state="Running"})/60) > 10  unless on(sessionID) (cloud_application_timeout{ident="test", state="Terminated"}) Severity:2}, value:
2023-04-25 10:12:23.070987 INFO eval/eval.go:67 eval:alert-1-26 started
2023-04-25 10:12:23.072731 DEBUG eval/eval.go:162 rule_eval:alert-1-3 query:{PromQl:service_status != 1 Severity:1}, value:
2023-04-25 10:12:23.072834 INFO eval/eval.go:67 eval:alert-1-34 started
2023-04-25 10:12:23.073374 INFO eval/eval.go:67 eval:alert-0-11 started
2023-04-25 10:12:23.073781 DEBUG eval/eval.go:162 rule_eval:alert-1-2 query:{PromQl:disk_used{fstype=~"ext4|xfs|fuse.juicefs"}/disk_total{fstype=~"ext4|xfs|fuse.juicefs"} * 100 >= 90 Severity:1}, value:
2023-04-25 10:12:23.074869 DEBUG eval/eval.go:162 rule_eval:alert-1-34 query:{PromQl:disk_used_percent{location="jn-psp"}> 90 Severity:2}, value:
2023-04-25 10:12:23.076380 INFO eval/eval.go:67 eval:alert-0-7 started
2023-04-25 10:12:23.076991 INFO eval/eval.go:67 eval:alert-1-20 started
2023-04-25 10:12:23.077590 INFO eval/eval.go:67 eval:alert-1-6 started
2023-04-25 10:12:23.079768 DEBUG eval/eval.go:162 rule_eval:alert-1-30 query:{PromQl:round((time() - job_state_duration{ident="dev", state="Completed"})/60) > 2  unless on(jobID) job_state_duration{state="FINISHED"} Severity:2}, value:
2023-04-25 10:12:23.081729 INFO eval/eval.go:67 eval:alert-1-25 started
2023-04-25 10:12:23.081774 DEBUG eval/eval.go:162 rule_eval:alert-1-6 query:{PromQl:(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - (node_memory_Cached_bytes + node_memory_Buffers_bytes))/node_memory_MemTotal_bytes*100 > 95 Severity:1}, value:
2023-04-25 10:12:23.087491 INFO eval/eval.go:67 eval:alert-1-19 started
2023-04-25 10:12:23.089491 DEBUG eval/eval.go:162 rule_eval:alert-1-26 query:{PromQl:round((time() - job_state_duration{ident="dev", state="SCJobPending"})/60) > 2 unless on(jobID) (job_state_duration{ident="dev", state="SCJobRunning"} or job_state_duration{ident="dev", state="Failed"} or job_state_duration{ident="dev", state="Cancelled"}) Severity:2}, value:{ident="dev", instance="http://localhost:24231/metrics", jobID="4Q3MVrqhkUL", state="SCJobPending", unixTime="1682327575"} => 1019 @[1682388743.076]
2023-04-25 10:12:23.089615 DEBUG process/process.go:288 rule_eval:alert-1-26 event:&{Id:0 Cate:prometheus Cluster:prometheus_local DatasourceId:1 GroupId:6 GroupName:dev_cloud Hash:81cd620c22c6b6baa43a5d1f0ffd37ff RuleId:26 RuleName:dev-作业排队超过2分钟!!! RuleNote: RuleProd:metric RuleAlgo: Severity:2 PromForDuration:60 PromQl: RuleConfig:{"queries":[{"prom_ql":"round((time() - job_state_duration{ident=\"dev\", state=\"SCJobPending\"})/60) \u003e 2 unless on(jobID) (job_state_duration{ident=\"dev\", state=\"SCJobRunning\"} or job_state_duration{ident=\"dev\", state=\"Failed\"} or job_state_duration{ident=\"dev\", state=\"Cancelled\"})","severity":2}]} RuleConfigJson:map[queries:[map[prom_ql:round((time() - job_state_duration{ident="dev", state="SCJobPending"})/60) > 2 unless on(jobID) (job_state_duration{ident="dev", state="SCJobRunning"} or job_state_duration{ident="dev", state="Failed"} or job_state_duration{ident="dev", state="Cancelled"}) severity:2]]] PromEvalInterval:60 Callbacks: CallbacksJSON:[] RunbookUrl: NotifyRecovered:0 NotifyChannels:wecom NotifyChannelsJSON:[wecom] NotifyGroups:3 NotifyGroupsJSON:[3] NotifyGroupsObj:[] TargetIdent:dev TargetNote: TriggerTime:1682388743 TriggerValue:1019 Tags:ident=dev,,instance=http://localhost:24231/metrics,,jobID=4Q3MVrqhkUL,,rulename=dev-作业排队超过2分钟!!!,,state=SCJobPending,,unixTime=1682327575 TagsJSON:[ident=dev instance=http://localhost:24231/metrics jobID=4Q3MVrqhkUL rulename=dev-作业排队超过2分钟!!! state=SCJobPending unixTime=1682327575] TagsMap:map[ident:dev instance:http://localhost:24231/metrics jobID:4Q3MVrqhkUL rulename:dev-作业排队超过2分钟!!! state:SCJobPending unixTime:1682327575] Annotations:{} AnnotationsJSON:map[] IsRecovered:false NotifyUsersObj:[] LastEvalTime:1682388743 LastSentTime:0 NotifyCurNumber:0 FirstTriggerTime:0} fire
2023-04-25 10:12:23.089713 DEBUG process/process.go:309 rule_eval:alert-1-26 event:&{Id:0 Cate:prometheus Cluster:prometheus_local DatasourceId:1 GroupId:6 GroupName:dev_cloud Hash:81cd620c22c6b6baa43a5d1f0ffd37ff RuleId:26 RuleName:dev-作业排队超过2分钟!!! RuleNote: RuleProd:metric RuleAlgo: Severity:2 PromForDuration:60 PromQl: RuleConfig:{"queries":[{"prom_ql":"round((time() - job_state_duration{ident=\"dev\", state=\"SCJobPending\"})/60) \u003e 2 unless on(jobID) (job_state_duration{ident=\"dev\", state=\"SCJobRunning\"} or job_state_duration{ident=\"dev\", state=\"Failed\"} or job_state_duration{ident=\"dev\", state=\"Cancelled\"})","severity":2}]} RuleConfigJson:map[queries:[map[prom_ql:round((time() - job_state_duration{ident="dev", state="SCJobPending"})/60) > 2 unless on(jobID) (job_state_duration{ident="dev", state="SCJobRunning"} or job_state_duration{ident="dev", state="Failed"} or job_state_duration{ident="dev", state="Cancelled"}) severity:2]]] PromEvalInterval:60 Callbacks: CallbacksJSON:[] RunbookUrl: NotifyRecovered:0 NotifyChannels:wecom NotifyChannelsJSON:[wecom] NotifyGroups:3 NotifyGroupsJSON:[3] NotifyGroupsObj:[] TargetIdent:dev TargetNote: TriggerTime:1682388743 TriggerValue:1019 Tags:ident=dev,,instance=http://localhost:24231/metrics,,jobID=4Q3MVrqhkUL,,rulename=dev-作业排队超过2分钟!!!,,state=SCJobPending,,unixTime=1682327575 TagsJSON:[ident=dev instance=http://localhost:24231/metrics jobID=4Q3MVrqhkUL rulename=dev-作业排队超过2分钟!!! state=SCJobPending unixTime=1682327575] TagsMap:map[ident:dev instance:http://localhost:24231/metrics jobID:4Q3MVrqhkUL rulename:dev-作业排队超过2分钟!!! state:SCJobPending unixTime:1682327575] Annotations:{} AnnotationsJSON:map[] IsRecovered:false NotifyUsersObj:[] LastEvalTime:1682388743 LastSentTime:0 NotifyCurNumber:0 FirstTriggerTime:0} reach max number
2023-04-25 10:12:23.090714 INFO eval/eval.go:67 eval:alert-1-28 started
2023-04-25 10:12:23.094445 DEBUG eval/eval.go:162 rule_eval:alert-1-28 query:{PromQl:round((time() - job_state_duration{ident="dev", state="SCJobRunning"})/60) > 2 unless on(jobID) (job_state_duration{ident="dev", state="Completed"} or job_state_duration{ident="dev", state="Failed"}or job_state_duration{ident="dev", state="Cancelled"}) Severity:2}, value:
2023-04-25 10:12:23.094776 INFO eval/eval.go:67 eval:alert-1-27 started
2023-04-25 10:12:23.095436 INFO eval/eval.go:67 eval:alert-1-12 started
2023-04-25 10:12:23.096452 INFO eval/eval.go:67 eval:alert-1-29 started
2023-04-25 10:12:23.102633 INFO eval/eval.go:67 eval:alert-1-13 started
2023-04-25 10:12:23.102666 DEBUG eval/eval.go:162 rule_eval:alert-1-12 query:{PromQl:mem_available_percent < 5 Severity:1}, value:
2023-04-25 10:12:23.103239 INFO eval/eval.go:67 eval:alert-1-24 started
2023-04-25 10:12:23.103885 DEBUG eval/eval.go:162 rule_eval:alert-1-25 query:{PromQl:round((time() - job_state_duration{ident="prod", state="SCJobPending"})/60) > 30 unless on(jobID) (job_state_duration{ident="prod", state="SCJobRunning"} or job_state_duration{ident="prod", state="Failed"} or job_state_duration{ident="prod", state="Completed"}) Severity:2}, value:{ident="prod", instance="localhost:24231", jobID="4Q45wXGjFk7", state="SCJobPending", unixTime="1682067108"} => 5361 @[1682388743.082]
2023-04-25 10:12:23.103954 DEBUG process/process.go:288 rule_eval:alert-1-25 event:&{Id:0 Cate:prometheus Cluster:prometheus_local DatasourceId:1 GroupId:2 GroupName:远算云 Hash:08eda5fa773ce43b1e5bfd47c60663a8 RuleId:25 RuleName:prod-作业排队超过30分钟 RuleNote: RuleProd:metric RuleAlgo: Severity:2 PromForDuration:60 PromQl: RuleConfig:{"queries":[{"prom_ql":"round((time() - job_state_duration{ident=\"prod\", state=\"SCJobPending\"})/60) \u003e 30 unless on(jobID) (job_state_duration{ident=\"prod\", state=\"SCJobRunning\"} or job_state_duration{ident=\"prod\", state=\"Failed\"} or job_state_duration{ident=\"prod\", state=\"Completed\"})","severity":2}]} RuleConfigJson:map[queries:[map[prom_ql:round((time() - job_state_duration{ident="prod", state="SCJobPending"})/60) > 30 unless on(jobID) (job_state_duration{ident="prod", state="SCJobRunning"} or job_state_duration{ident="prod", state="Failed"} or job_state_duration{ident="prod", state="Completed"}) severity:2]]] PromEvalInterval:60 Callbacks: CallbacksJSON:[] RunbookUrl: NotifyRecovered:1 NotifyChannels:wecom NotifyChannelsJSON:[wecom] NotifyGroups:2 NotifyGroupsJSON:[2] NotifyGroupsObj:[] TargetIdent:prod TargetNote: TriggerTime:1682388743 TriggerValue:5361 Tags:ident=prod,,instance=localhost:24231,,jobID=4Q45wXGjFk7,,rulename=prod-作业排队超过30分钟,,state=SCJobPending,,unixTime=1682067108 TagsJSON:[ident=prod instance=localhost:24231 jobID=4Q45wXGjFk7 rulename=prod-作业排队超过30分钟 state=SCJobPending unixTime=1682067108] TagsMap:map[ident:prod instance:localhost:24231 jobID:4Q45wXGjFk7 rulename:prod-作业排队超过30分钟 state:SCJobPending unixTime:1682067108] Annotations:{} AnnotationsJSON:map[] IsRecovered:false NotifyUsersObj:[] LastEvalTime:1682388743 LastSentTime:0 NotifyCurNumber:0 FirstTriggerTime:0} fire
2023-04-25 10:12:23.104034 DEBUG process/process.go:309 rule_eval:alert-1-25 event:&{Id:0 Cate:prometheus Cluster:prometheus_local DatasourceId:1 GroupId:2 GroupName:远算云 Hash:08eda5fa773ce43b1e5bfd47c60663a8 RuleId:25 RuleName:prod-作业排队超过30分钟 RuleNote: RuleProd:metric RuleAlgo: Severity:2 PromForDuration:60 PromQl: RuleConfig:{"queries":[{"prom_ql":"round((time() - job_state_duration{ident=\"prod\", state=\"SCJobPending\"})/60) \u003e 30 unless on(jobID) (job_state_duration{ident=\"prod\", state=\"SCJobRunning\"} or job_state_duration{ident=\"prod\", state=\"Failed\"} or job_state_duration{ident=\"prod\", state=\"Completed\"})","severity":2}]} RuleConfigJson:map[queries:[map[prom_ql:round((time() - job_state_duration{ident="prod", state="SCJobPending"})/60) > 30 unless on(jobID) (job_state_duration{ident="prod", state="SCJobRunning"} or job_state_duration{ident="prod", state="Failed"} or job_state_duration{ident="prod", state="Completed"}) severity:2]]] PromEvalInterval:60 Callbacks: CallbacksJSON:[] RunbookUrl: NotifyRecovered:1 NotifyChannels:wecom NotifyChannelsJSON:[wecom] NotifyGroups:2 NotifyGroupsJSON:[2] NotifyGroupsObj:[] TargetIdent:prod TargetNote: TriggerTime:1682388743 TriggerValue:5361 Tags:ident=prod,,instance=localhost:24231,,jobID=4Q45wXGjFk7,,rulename=prod-作业排队超过30分钟,,state=SCJobPending,,unixTime=1682067108 TagsJSON:[ident=prod instance=localhost:24231 jobID=4Q45wXGjFk7 rulename=prod-作业排队超过30分钟 state=SCJobPending unixTime=1682067108] TagsMap:map[ident:prod instance:localhost:24231 jobID:4Q45wXGjFk7 rulename:prod-作业排队超过30分钟 state:SCJobPending unixTime:1682067108] Annotations:{} AnnotationsJSON:map[] IsRecovered:false NotifyUsersObj:[] LastEvalTime:1682388743 LastSentTime:0 NotifyCurNumber:0 FirstTriggerTime:0} reach max number
2023-04-25 10:12:23.104590 INFO eval/eval.go:67 eval:alert-1-37 started
2023-04-25 10:12:23.105332 INFO eval/eval.go:67 eval:alert-1-9 started
...............
...............

2023/04/25 10:14:18 /Users/ning/gopath/src/github.com/ccfos/nightingale/pushgw/idents/idents.go:90 SLOW SQL >= 200ms
[201.727ms] [rows:42] UPDATE `target` SET `update_at`=1682388858 WHERE ident in ('k8sdev-categraf-n9e-fwnv7','zjy-jump_10.10.0.201','dev-new-worker3','cn04_172.16.1.8','gwcaedemo-1.117.192.82','dev-new-master1','k8sdev-categraf-n9e-kvkms','phab-10.0.6.91','beegfs01_172.16.1.251','gw-store-prod-43.142.132.111','wiki-10.0.7.243','mztest3-115.159.5.26','ipsec-123.206.114.26','dev-new.yuansuan.cn','kw-test_172.17.64.6','swys-192.168.216.251','vis_gn02_1_test','n9e-10.0.6.2','compute03_192.168.11.13','rancher-110.40.154.185','vis-test-81.68.123.110','kw-nexus_172.17.64.14','mz2-122.51.90.49','compute11_192.168.11.21','vis_gn01_1','vis_gn02_2','k8sdev-categraf-n9e-4m9lr','dev','k8sdev-categraf-n9e-6s4sh','compute09_192.168.11.19','cn01_172.16.1.1','compute02_192.168.11.12','jn-k8s-n9e-agent-zerotier-z2hw4','compute04_192.168.11.14','compute12_192.168.11.22','mz1-122.51.91.220','testsc-111.229.185.237','gw-store-dev-1.15.239.253','mz3-49.235.127.137','cn03_172.16.1.7','vis_gn03_1','vis_gn01_2')
2023-04-25 10:14:20.047311 DEBUG memsto/busi_group_cache.go:93 busi_group not changed
2023-04-25 10:14:20.047350 DEBUG memsto/datasource_cache.go:92 datasource not changed
2023-04-25 10:14:20.051482 DEBUG memsto/alert_rule_cache.go:113 alert rules not changed
2023-04-25 10:14:20.052877 DEBUG memsto/recording_rule_cache.go:114 recoding rules not changed
2023-04-25 10:14:20.053624 DEBUG memsto/user_cache.go:138 users not changed
2023-04-25 10:14:20.053653 DEBUG memsto/user_group_cache.go:120 user_group not changed
2023-04-25 10:14:20.053664 DEBUG memsto/alert_subscribe_cache.go:119 alert subscribes not changed
2023-04-25 10:14:20.076475 DEBUG memsto/alert_mute_cache.go:117 alert mutes not changed
2023-04-25 10:14:20.077441 INFO memsto/target_cache.go:163 timer: sync targets done, cost: 1ms, number: 97
2023-04-25 10:14:20.086269 INFO memsto/notify_config.go:69 timer: sync wbhooks done number: 0
2023-04-25 10:14:20.086685 INFO memsto/notify_config.go:83 timer: sync smtp:{Host: Port:0 User: Pass: From: InsecureSkipVerify:false Batch:0} done
2023-04-25 10:14:20.087037 INFO memsto/notify_config.go:97 timer: sync notify script done
2023-04-25 10:14:20.087429 INFO memsto/notify_config.go:111 timer: sync ibex done
2023-04-25 10:14:20.116576 DEBUG naming/hashring.go:55 datasource id:0 pk:28 failed to get node from hashring:empty circle
2023-04-25 10:14:20.116667 DEBUG naming/hashring.go:55 datasource id:0 pk:27 failed to get node from hashring:empty circle
2023-04-25 10:14:20.116722 DEBUG naming/hashring.go:55 datasource id:0 pk:29 failed to get node from hashring:empty circle
2023-04-25 10:14:20.116782 DEBUG naming/hashring.go:55 datasource id:0 pk:26 failed to get node from hashring:empty circle
2023-04-25 10:14:20.116802 DEBUG naming/hashring.go:55 datasource id:0 pk:30 failed to get node from hashring:empty circle
2023-04-25 10:14:23.069018 DEBUG eval/eval.go:162 rule_eval:alert-1-36 query:{PromQl:jn_psp{type="vis"} != 1 Severity:1}, value:
2023-04-25 10:14:23.069242 DEBUG eval/eval.go:162 rule_eval:alert-1-33 query:{PromQl:round((time() - cloud_application_timeout{ident="prod", state="Running"})/60) > 720  unless on(sessionID) (cloud_application_timeout{ident="prod", state="Terminated"}) Severity:3}, value:
2023-04-25 10:14:23.080703 DEBUG eval/eval.go:162 rule_eval:alert-1-15 query:{PromQl:service_status != 1 Severity:3}, value:
2023-04-25 10:14:23.082724 DEBUG eval/eval.go:162 rule_eval:alert-1-8 query:{PromQl:mem_available_percent < 5 Severity:1}, value:
2023-04-25 10:14:23.087592 DEBUG eval/eval.go:162 rule_eval:alert-1-32 query:{PromQl:round((time() - cloud_application_timeout{ident="test", state="Running"})/60) > 10  unless on(sessionID) (cloud_application_timeout{ident="test", state="Terminated"}) Severity:2}, value:
2023-04-25 10:14:23.094646 DEBUG eval/eval.go:162 rule_eval:alert-1-34 query:{PromQl:disk_used_percent{location="jn-psp"}> 90 Severity:2}, value:
2023-04-25 10:14:23.094763 DEBUG eval/eval.go:162 rule_eval:alert-1-14 query:{PromQl:disk_used_percent > 98 Severity:1}, value:
2023-04-25 10:14:23.094769 DEBUG eval/eval.go:162 rule_eval:alert-1-5 query:{PromQl:ping_result_code != 0 Severity:1}, value:
2023-04-25 10:14:23.095443 DEBUG eval/eval.go:162 rule_eval:alert-1-3 query:{PromQl:service_status != 1 Severity:1}, value: