Grafana已经启动了,可以看到监控数据了,但是services port status中仍然显式grafana状态为down

grafana已经启动了,web页面ip:3000已经可以看到监控数据了,但是services port status中仍然显示grafana的状态为down

请大佬帮忙看下

可以看下进程是否启动成功,另外辛苦提供下版本,另外,可以根据日志排查下问题

我们的版本是
Server version: 5.7.25-TiDB-v3.0.1 MySQL Community Server (Apache License 2.0)

启动的进程

我发现我的grafana collector进程没有启动

grafana的日志貌似也没有看到什么报错的地方

状态一直没有更新吗,看上面讲的 grafana collector 进程没有启动,能否用日志方面排查下是什么问题吗?

t=2019-09-19T21:49:49+0800 lvl=eror msg="failed to look up user based on cookie" logger=context error="user token not found"
t=2019-09-19T21:49:52+0800 lvl=eror msg="Dashboard not found" logger=context userId=1 orgId=1 uname=admin error="Dashboard not found"
t=2019-09-19T21:50:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:50:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:51:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:51:10+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:52:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:52:19+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:53:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:53:05+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:53:23+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=10 name="TiKV raft store CPU alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A53%3A23.196918121%2B08%3A00&query=sum%28rate%28tikv_thread_cpu_seconds_total%7Binstance%3D~%22%24instance%22%2C+name%3D~%22raftstore_.%2A%22%7D%5B1m%5D%29%29+by+%28instance%29&start=2019-09-19T21%3A52%3A23.196918121%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:53:24+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=7 name="server report failures alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A53%3A24.196592684%2B08%3A00&query=sum%28rate%28tikv_server_report_failure_msg_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28type%2Cinstance%2Cstore_id%29&start=2019-09-19T21%3A53%3A14.196592684%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:53:24+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=9 name="approximate region size alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A53%3A24.196582431%2B08%3A00&query=histogram_quantile%280.99%2C+sum%28rate%28tikv_raftstore_region_size_bucket%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28le%29%29&start=2019-09-19T21%3A52%3A24.196582431%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:53:25+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=8 name="TiKV channel full alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A53%3A25.196570793%2B08%3A00&query=sum%28rate%28tikv_channel_full_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28instance%2C+type%29&start=2019-09-19T21%3A53%3A15.196570793%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:53:28+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=11 name="scheduler pending commands alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A53%3A28.196628945%2B08%3A00&query=sum%28tikv_scheduler_contex_total%7Binstance%3D~%22%24instance%22%7D%29+by+%28instance%29&start=2019-09-19T21%3A48%3A28.196628945%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:53:31+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=2 name="server report failures alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A53%3A31.196574726%2B08%3A00&query=sum%28rate%28tikv_server_report_failure_msg_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28type%2Cinstance%2Cstore_id%29&start=2019-09-19T21%3A53%3A21.196574726%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:53:31+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=3 name="TiKV channel full alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A53%3A31.196581594%2B08%3A00&query=sum%28rate%28tikv_channel_full_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28instance%2C+type%29&start=2019-09-19T21%3A53%3A21.196581594%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:53:32+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=7 name="server report failures alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A53%3A32.196416388%2B08%3A00&query=sum%28rate%28tikv_server_report_failure_msg_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28type%2Cinstance%2Cstore_id%29&start=2019-09-19T21%3A53%3A22.196416388%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:53:35+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=8 name="TiKV channel full alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A53%3A35.196605261%2B08%3A00&query=sum%28rate%28tikv_channel_full_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28instance%2C+type%29&start=2019-09-19T21%3A53%3A25.196605261%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:55:14+0800 lvl=eror msg="Data source with same name already exists" logger=context userId=1 orgId=1 uname=admin error="Data source with same name already exists"
t=2019-09-19T21:56:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:56:09+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:56:55+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=8 name="TiKV channel full alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A56%3A55.487311686%2B08%3A00&query=sum%28rate%28tikv_channel_full_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28instance%2C+type%29&start=2019-09-19T21%3A56%3A45.487311686%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:01+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=3 name="TiKV channel full alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A01.487519215%2B08%3A00&query=sum%28rate%28tikv_channel_full_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28instance%2C+type%29&start=2019-09-19T21%3A56%3A51.487519215%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:01+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=2 name="server report failures alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A01.487817774%2B08%3A00&query=sum%28rate%28tikv_server_report_failure_msg_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28type%2Cinstance%2Cstore_id%29&start=2019-09-19T21%3A56%3A51.487817774%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:01+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=1 name="Critical error alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A01.488056898%2B08%3A00&query=sum%28rate%28tikv_critical_error_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28instance%2C+type%29&start=2019-09-19T21%3A52%3A01.488056898%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:04+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=7 name="server report failures alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A04.487594411%2B08%3A00&query=sum%28rate%28tikv_server_report_failure_msg_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28type%2Cinstance%2Cstore_id%29&start=2019-09-19T21%3A56%3A54.487594411%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:05+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=4 name="approximate region size alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A05.4875177%2B08%3A00&query=histogram_quantile%280.99%2C+sum%28rate%28tikv_raftstore_region_size_bucket%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28le%29%29&start=2019-09-19T21%3A56%3A05.4875177%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:05+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=8 name="TiKV channel full alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A05.487525132%2B08%3A00&query=sum%28rate%28tikv_channel_full_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28instance%2C+type%29&start=2019-09-19T21%3A56%3A55.487525132%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:11+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=2 name="server report failures alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A11.487478078%2B08%3A00&query=sum%28rate%28tikv_server_report_failure_msg_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28type%2Cinstance%2Cstore_id%29&start=2019-09-19T21%3A57%3A01.487478078%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:11+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=3 name="TiKV channel full alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A11.487518627%2B08%3A00&query=sum%28rate%28tikv_channel_full_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28instance%2C+type%29&start=2019-09-19T21%3A57%3A01.487518627%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:12+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=7 name="server report failures alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A12.48931918%2B08%3A00&query=sum%28rate%28tikv_server_report_failure_msg_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28type%2Cinstance%2Cstore_id%29&start=2019-09-19T21%3A57%3A02.48931918%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:57:15+0800 lvl=eror msg="Alert Rule Result Error" logger=alerting.evalContext ruleId=8 name="TiKV channel full alert" error="tsdb.HandleRequest() error Get http://10.0.66.228:9090/api/v1/query_range?end=2019-09-19T21%3A57%3A15.487436859%2B08%3A00&query=sum%28rate%28tikv_channel_full_total%7Binstance%3D~%22%24instance%22%7D%5B1m%5D%29%29+by+%28instance%2C+type%29&start=2019-09-19T21%3A57%3A05.487436859%2B08%3A00&step=30.000: dial tcp 10.0.66.228:9090: connect: connection refused" changing state to=alerting
t=2019-09-19T21:58:36+0800 lvl=eror msg="Data source with same name already exists" logger=context userId=1 orgId=1 uname=admin error="Data source with same name already exists"
t=2019-09-19T21:59:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T21:59:13+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:00:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:00:22+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:01:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:01:08+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:02:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:02:17+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:03:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:03:03+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:04:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:04:12+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:05:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:05:21+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:06:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:06:07+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:07:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:07:16+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:08:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:08:02+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:09:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:09:11+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:10:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:10:20+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:11:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:11:06+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:12:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:12:15+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:13:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:13:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:14:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:14:10+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:15:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:15:19+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:16:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:16:05+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:17:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:17:14+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:18:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:18:23+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:19:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:19:09+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:20:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:20:18+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:21:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:21:04+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:22:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:22:13+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:23:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:23:22+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:24:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:24:08+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:25:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:25:17+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:26:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:26:03+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:27:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:27:12+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:28:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:28:21+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:29:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:29:07+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:30:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:30:16+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:31:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:31:02+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:32:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:32:11+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:33:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:33:20+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:34:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:34:06+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:35:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:35:15+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:36:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:36:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:37:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:37:10+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:38:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:38:19+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:39:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:39:05+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:40:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:40:14+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:41:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:41:23+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:42:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:42:09+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:43:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:43:18+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=6 name="Critical error alert" changing state to=no_data
t=2019-09-19T22:44:01+0800 lvl=info msg="Alert Rule returned no data" logger=alerting.evalContext ruleId=1 name="Critical error alert" changing state to=no_data
 

日志里面报错是这样的,烦请大佬帮忙看下

看日志显示,grafana 里面的 data source 有重名的,在 grafana 面板的 data source 位置检查下,是不是有多个数据源导致显示有问题。

我发现部署完毕后都没有grafana的bin目录下都没有grafana_collector命令,这个是我的部署信息,麻烦帮忙看下有没有问题

## TiDB Cluster Part
[tidb_servers]
10.11.10.145
10.11.10.146

[tikv_servers]
10.11.10.142
10.11.10.143
10.11.10.144

[pd_servers]
10.11.10.145
10.11.10.146
10.11.10.147

[spark_master]

[spark_slaves]

[lightning_server]

[importer_server]

## Monitoring Part
# prometheus and pushgateway servers
[monitoring_servers]
monitoring_servers ansible_host=10.1.66.228 deploy_dir=/data/monitoring_servers/deploy

[grafana_servers]
grafana_servers ansible_host=10.1.66.228 deploy_dir=/data/grafana_servers/deploy

# node_exporter and blackbox_exporter servers
[monitored_servers]
10.11.10.142
10.11.10.143
10.11.10.144
10.11.10.145
10.11.10.146
10.11.10.147
10.1.66.228

[alertmanager_servers]
alertmanager_servers ansible_host=10.1.66.228 deploy_dir=/data/alertmanager_servers/deploy

[kafka_exporter_servers]

## Binlog Part
[pump_servers]

[drainer_servers]

## Group variables
[pd_servers:vars]
# location_labels = ["zone","rack","host"]

## Global variables
[all:vars]
deploy_dir = /data/deploy

## Connection
# ssh via normal user
ansible_user = tidb

cluster_name = tidb-cluster

tidb_version = v3.0.1

# process supervision, [systemd, supervise]
process_supervision = systemd

timezone = Asia/Shanghai

enable_firewalld = False
# check NTP service
enable_ntpd = True
set_hostname = False

## binlog trigger
enable_binlog = False

# kafka cluster address for monitoring, example:
# kafka_addrs = "192.168.0.11:9092,192.168.0.12:9092,192.168.0.13:9092"
kafka_addrs = ""

# zookeeper address of kafka cluster for monitoring, example:
# zookeeper_addrs = "192.168.0.11:2181,192.168.0.12:2181,192.168.0.13:2181"
zookeeper_addrs = ""

# enable TLS authentication in the TiDB cluster
enable_tls = False

# KV mode
deploy_without_tidb = False

# wait for region replication complete before start tidb-server.
wait_replication = True

# Optional: Set if you already have a alertmanager server.
# Format: alertmanager_host:alertmanager_port
alertmanager_target = ""

grafana_admin_user = "admin"
grafana_admin_password = "****"


### Collect diagnosis
collect_log_recent_hours = 2

enable_bandwidth_limit = True
# default: 10Mb/s, unit: Kbit/s
collect_bandwidth_limit = 10000

Data source怎么查看?

data source位置只有一个

在 grafana 上面有个 data source 页面,看下是不是有多个数据源。另外没有 grafana_collector ,是不是 deploy 的时候有问题,看 inventory 文件没有什么问题。

depoly的时候显示都是ok的,没有报错

可以单独grafana和普罗米修斯重新部署下吗?

单独更新一下 grafana : ansible-playbook rolling_update.yml --tags= grafana

这个会有影响吗?

单独更新之后还是一样的,情况还是和以前一样

发现v3.0.1版本已经移除grafana collector

请问最新的grafana json文件哪里可以下载

[monitoring_servers] monitoring_servers ansible_host=10.1.66.228 deploy_dir=/data/monitoring_servers/deploy

[grafana_servers] grafana_servers ansible_host=10.1.66.228 deploy_dir=/data/grafana_servers/deploy

尝试这边的别名不要和 [] 标签名一样

后面我改了但是问题依旧,问题的核心是v3.0.1后面把grafana_collector给剔除掉了