diff --git a/README.md b/README.md index 6b571c4..5d619e5 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ make | consul_serf_lan_member_status | Status of member in the cluster. 1=Alive, 2=Leaving, 3=Left, 4=Failed. | member | | consul_serf_wan_member_info | Information of member in the wan cluster. | member, dc, role, version | | consul_serf_wan_member_status | Status of member in the wan cluster. 1=Alive, 2=Leaving, 3=Left, 4=Failed. | member, dc | +| consul_raft_lag | How far behind the leader a member is. | member, member_id | | consul_catalog_services | How many services are in the cluster | | | consul_service_tag | Tags of a service. | service_id, node, tag | | consul_catalog_service_node_healthy | Is this service healthy on this node | service_id, node, service_name | diff --git a/pkg/exporter/consul_exporter.go b/pkg/exporter/consul_exporter.go index 7eb10c6..88bfd0c 100644 --- a/pkg/exporter/consul_exporter.go +++ b/pkg/exporter/consul_exporter.go @@ -50,6 +50,11 @@ var ( "Does Raft cluster have a leader (according to this node).", nil, nil, ) + clusterLag = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "raft_lag"), + "How far behind the leader a node is.", + []string{"member", "member_id"}, nil, + ) nodeCount = prometheus.NewDesc( prometheus.BuildFQName(namespace, "", "serf_lan_members"), "How many members are in the cluster.", @@ -210,6 +215,7 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { ch <- up ch <- clusterServers ch <- clusterLeader + ch <- clusterLag ch <- nodeCount ch <- memberInfo ch <- memberStatus @@ -232,6 +238,7 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { if !e.agentOnly { ok = e.collectPeersMetric(ch) && ok ok = e.collectLeaderMetric(ch) && ok + ok = e.collectLagMetric(ch) && ok ok = e.collectNodesMetric(ch) && ok ok = e.collectMembersInfoMetric(ch) && ok ok = e.collectMembersMetric(ch) && ok @@ -282,6 +289,30 @@ func (e *Exporter) collectLeaderMetric(ch chan<- prometheus.Metric) bool { return true } +func (e *Exporter) collectLagMetric(ch chan<- prometheus.Metric) bool { + raft_config, err := e.client.Operator().RaftGetConfiguration(&e.queryOptions) + if err != nil { + e.logger.Error("Can't query consul", "err", err) + return false + } + + leaderLastCommitIndex := uint64(0) + + for _, raftServer := range raft_config.Servers { + if raftServer.Leader { + leaderLastCommitIndex = raftServer.LastIndex + } + } + + for _, s := range raft_config.Servers { + lag := leaderLastCommitIndex - s.LastIndex + ch <- prometheus.MustNewConstMetric( + clusterLag, prometheus.GaugeValue, float64(lag), s.Node, s.ID, + ) + } + return true +} + func (e *Exporter) collectNodesMetric(ch chan<- prometheus.Metric) bool { nodes, _, err := e.client.Catalog().Nodes(&e.queryOptions) if err != nil {