问题:采用n9e-edge边缘机房模式,监控后端主机出现无法连通,反复刷新所有主机会有瞬间变更绿色在线状态,请问如何排查处理。
排查:
(1)被监控主机到n9e-edge网络正常,且可以采集到监控数据并通过面板展示出来;
(2)n9e-edge到n9e中心端telnet网络正常,数据是可以采集并展示出来;
请各位前辈协助提供思路,谢谢。
问题:采用n9e-edge边缘机房模式,监控后端主机出现无法连通,反复刷新所有主机会有瞬间变更绿色在线状态,请问如何排查处理。
排查:
(1)被监控主机到n9e-edge网络正常,且可以采集到监控数据并通过面板展示出来;
(2)n9e-edge到n9e中心端telnet网络正常,数据是可以采集并展示出来;
请各位前辈协助提供思路,谢谢。
n9e-edge
日志信息
配置信息:
[root@monitor n9e]# cat etc/edge/edge.toml
[Global]
RunMode = "release"
[CenterApi]
Addrs = ["http://202.xx.xx.xx:19191"]
BasicAuthUser = "xx"
BasicAuthPass = "xxyy"
# unit: ms
Timeout = 9000
[Log]
# log write dir
Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
# KeepHours = 4
# # rotate by size
# RotateNum = 3
# # unit: MB
# RotateSize = 256
[HTTP]
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 19000
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = false
# whether enable pprof
PProf = false
# expose prometheus /metrics?
ExposeMetrics = true
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120
[HTTP.APIForAgent]
Enable = true
# [HTTP.APIForAgent.BasicAuth]
xx = "xxyy"
[HTTP.APIForService]
Enable = true
[HTTP.APIForService.BasicAuth]
xx = "xxyy"
[Alert]
[Alert.Heartbeat]
# auto detect if blank
IP = ""
# unit ms
Interval = 1000
EngineName = "edge"
# [Alert.Alerting]
# NotifyConcurrency = 10
[Pushgw]
# use target labels in database instead of in series
LabelRewrite = true
# # default busigroup key name
# BusiGroupLabelKey = "busigroup"
# ForceUseServerTS = false
# [Pushgw.DebugSample]
# ident = "xx"
# __name__ = "xx"
# [Pushgw.WriterOpt]
# QueueMaxSize = 1000000
# QueuePopSize = 1000
[[Pushgw.Writers]]
# Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write"
Url = "http://127.0.0.1:8428/api/v1/write"
# Basic auth username
BasicAuthUser = "xx"
# Basic auth password
BasicAuthPass = "xxyy"
# timeout settings, unit: ms
Headers = ["X-From", "n9e"]
Timeout = 10000
DialTimeout = 3000
TLSHandshakeTimeout = 30000
ExpectContinueTimeout = 1000
IdleConnTimeout = 90000
# time duration, unit: ms
KeepAlive = 30000
MaxConnsPerHost = 0
MaxIdleConns = 100
MaxIdleConnsPerHost = 100
## Optional TLS Config
# UseTLS = false
# TLSCA = "/etc/n9e/ca.pem"
# TLSCert = "/etc/n9e/cert.pem"
# TLSKey = "/etc/n9e/key.pem"
# InsecureSkipVerify = false
# [[Writers.WriteRelabels]]
# Action = "replace"
# SourceLabels = ["__address__"]
# Regex = "([^:]+)(?::\\d+)?"