Ruler

简要概述

用于告警规则

配置示例

ruler:
  evaluation_interval: 60s
  poll_interval: 60s
  rule_path: /data/cortex/rule
  alertmanager_url: "http://192.168.31.201:9009/alertmanager"
  enable_alertmanager_v2: true
  notification_queue_capacity: 10000
  notification_timeout: 10s
  enable_sharding: true
  ring:
    kvstore:
      store: "etcd"
      prefix: "/cortex/rulers/"
      etcd:
        endpoints: ["192.168.31.201:2379","192.168.31.202:2379","192.168.31.203:2379"]
        dial_timeout: 10s
        max_retries: 10
        tls_enabled: true
        tls_cert_path: "/opt/cortex/pki/server.crt"
        tls_key_path: "/opt/cortex/pki/server.key"
        tls_ca_path: "/opt/cortex/pki/ca.crt"
        tls_insecure_skip_verify: false
    heartbeat_period: 5s
    heartbeat_timeout: 1m0s
    instance_id: node1
    instance_port: 9005
    instance_addr: "192.168.31.201"
  enable_api: true
  query_stats_enabled: true

ruler_storage:
  backend: s3
  s3:
    endpoint: "192.168.31.200:9000"
    region: "default"
    bucket_name: "uptime"
    secret_access_key: "minioadmin"
    access_key_id: "minioadmin"
    insecure: false

数据结构

Config

// Config is the configuration for the recording rules server.
type Config struct {
    // This is used for template expansion in alerts; must be a valid URL.
    ExternalURL flagext.URLValue `yaml:"external_url"`
    // Labels to add to all alerts
    ExternalLabels labels.Labels `yaml:"external_labels,omitempty" doc:"nocli|description=Labels to add to all alerts."`
    // GRPC Client configuration.
    ClientTLSConfig grpcclient.Config `yaml:"ruler_client"`
    // How frequently to evaluate rules by default.
    // 检测告警规则表达式的时间,默认 1分钟
    EvaluationInterval time.Duration `yaml:"evaluation_interval"`
    // How frequently to poll for updated rules.
    // 多久同步一次告警规则,默认 1分钟
    PollInterval time.Duration `yaml:"poll_interval"`
    // Path to store rule files for prom manager.
    // 存放告警规则在本地的路径
    RulePath string `yaml:"rule_path"`

    // URL of the Alertmanager to send notifications to.
    // If your are configuring the ruler to send to a Cortex Alertmanager,
    // ensure this includes any path set in the Alertmanager external URL.
    // 告警推送至 alertmanager 实例
    AlertmanagerURL string `yaml:"alertmanager_url"`
    // Whether to use DNS SRV records to discover Alertmanager.
    AlertmanagerDiscovery bool `yaml:"enable_alertmanager_discovery"`
    // How long to wait between refreshing the list of Alertmanager based on DNS service discovery.
    AlertmanagerRefreshInterval time.Duration `yaml:"alertmanager_refresh_interval"`
    // Enables the ruler notifier to use the Alertmananger V2 API.
    // 调用 alertmanager 使用接口版本2
    AlertmanangerEnableV2API bool `yaml:"enable_alertmanager_v2"`
    // Capacity of the queue for notifications to be sent to the Alertmanager.
    NotificationQueueCapacity int `yaml:"notification_queue_capacity"`
    // HTTP timeout duration when sending notifications to the Alertmanager.
    NotificationTimeout time.Duration `yaml:"notification_timeout"`
    // Client configs for interacting with the Alertmanager
    Notifier NotifierConfig `yaml:"alertmanager_client"`

    // Max time to tolerate outage for restoring "for" state of alert.
    OutageTolerance time.Duration `yaml:"for_outage_tolerance"`
    // Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period.
    ForGracePeriod time.Duration `yaml:"for_grace_period"`
    // Minimum amount of time to wait before resending an alert to Alertmanager.
    ResendDelay time.Duration `yaml:"resend_delay"`

    // Enable sharding rule groups.
    EnableSharding   bool          `yaml:"enable_sharding"`
    ShardingStrategy string        `yaml:"sharding_strategy"`
    SearchPendingFor time.Duration `yaml:"search_pending_for"`
    Ring             RingConfig    `yaml:"ring"`
    FlushCheckPeriod time.Duration `yaml:"flush_period"`

    EnableAPI bool `yaml:"enable_api"`

    EnabledTenants  flagext.StringSliceCSV `yaml:"enabled_tenants"`
    DisabledTenants flagext.StringSliceCSV `yaml:"disabled_tenants"`

    RingCheckPeriod time.Duration `yaml:"-"`

    EnableQueryStats      bool `yaml:"query_stats_enabled"`
    DisableRuleGroupLabel bool `yaml:"disable_rule_group_label"`
}

rulestore.Config

github.com/cortexproject/cortex/pkg/ruler/rulestore/config.go

// Config configures a rule store.
type Config struct {
    bucket.Config `yaml:",inline"`
    ConfigDB      client.Config `yaml:"configdb"`
    Local         local.Config  `yaml:"local"`
}

bucket.Config

数据存储

client.Config

github.com/cortexproject/cortex/pkg/configs/client/client.go

// Config says where we can find the ruler userconfig.
type Config struct {
    ConfigsAPIURL flagext.URLValue     `yaml:"configs_api_url"`
    ClientTimeout time.Duration        `yaml:"client_timeout"` // HTTP timeout duration for requests made to the Weave Cloud configs service.
    TLS           tls_cfg.ClientConfig `yaml:",inline"`
}

local.Config

github.com/cortexproject/cortex/pkg/ruler/rulestore/local/local.go

type Config struct {
    Directory string `yaml:"directory"`
}

RingConfig

github.com/cortexproject/cortex/pkg/ruler/ruler_ring.go

// RingConfig masks the ring lifecycler config which contains
// many options not really required by the rulers ring. This config
// is used to strip down the config to the minimum, and avoid confusion
// to the user.
type RingConfig struct {
    KVStore          kv.Config     `yaml:"kvstore"`
    HeartbeatPeriod  time.Duration `yaml:"heartbeat_period"`
    HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`

    // Instance details
    InstanceID             string   `yaml:"instance_id" doc:"hidden"`
    InstanceInterfaceNames []string `yaml:"instance_interface_names"`
    InstancePort           int      `yaml:"instance_port" doc:"hidden"`
    InstanceAddr           string   `yaml:"instance_addr" doc:"hidden"`
    NumTokens              int      `yaml:"num_tokens"`

    // Injected internally
    ListenPort int `yaml:"-"`

    // Used for testing
    SkipUnregister bool `yaml:"-"`
}



最后修改 2023.07.06: refactor: update some (5fe4b38)