Alerts


/etc/prometheus/guix.alerts > guix
BuildCoordinatorHookEventsNotBeingProcessed (0 active)
alert: BuildCoordinatorHookEventsNotBeingProcessed
expr: rate(guixbuildcoordinator_hook_duration_seconds_count[30m])
  < 0.001 and rate(guixbuildcoordinator_unprocessed_hook_events_total[30m]) >
  0.001
for: 10m
labels:
  severity: critical
annotations:
  description: '{{ $labels.event }} events are not being processed'
  summary: Build coordinator events not being processed
GuixTargetMissing (0 active)
alert: GuixTargetMissing
expr: up{job=~"bordeaux.guix.gnu.org-nar-herders|coordinator.bayfront.guix.gnu.org|data.qa.guix.gnu.org-guix-data-service|data.guix.gnu.org-guix-data-service|monokuma.cbaines.net/node-exporter|hatysa.cbaines.net/node-exporter|hamal.cbaines.net/node-exporter|milano-guix-1/node-exporter"}
  == 0
for: 10m
labels:
  severity: critical
annotations:
  description: |-
    A Prometheus target has disappeared. An exporter might be crashed.
      VALUE = {{ $value }}
      LABELS = {{ $labels }}
  summary: Prometheus target missing (instance {{ $labels.instance }})
MirrorMissingNars (0 active)
alert: MirrorMissingNars
expr: narherder_nar_files_total{instance=~"hydra-guix-129.guix.gnu.org:443|hatysa.cbaines.net:443",stored="false"}
  > 250
for: 10m
labels:
  severity: critical
annotations:
  description: Mirror missing {{ $value }} nars
  summary: Mirror missing nars (instance {{ $labels.instance }})
TooManyNarsOnBordeaux (0 active)
alert: TooManyNarsOnBordeaux
expr: narherder_nar_files_total{instance="bordeaux.guix.gnu.org:443",stored="true"}
  > 250
for: 10m
labels:
  severity: critical
annotations:
  description: Bordeaux has {{ $value }} nars
  summary: Too many nars on bordeaux
/etc/prometheus/node.alerts > node
HostOutOfDiskSpace (3 active)
alert: HostOutOfDiskSpace
expr: (node_filesystem_avail_bytes{instance!="capella.cbaines.net:9100"}
  * 100) / node_filesystem_size_bytes < 10
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Disk is almost full (< 10% left)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Host out of disk space (instance {{ $labels.instance }})
Labels State Active Since Value
alertname="HostOutOfDiskSpace" device="/dev/nvme0n1p1" fstype="ext4" instance="rumbia.cbaines.net:9100" job="rumbia.cbaines.net/node-exporter" mountpoint="/gnu/store" severity="warning" firing 2024-05-02 19:23:12.917888974 +0000 UTC 0.15777439437593543
alertname="HostOutOfDiskSpace" device="/dev/nvme0n1p1" fstype="ext4" instance="rumbia.cbaines.net:9100" job="rumbia.cbaines.net/node-exporter" mountpoint="/gnu" severity="warning" firing 2024-05-02 19:23:12.917888974 +0000 UTC 0.15777439437593543
alertname="HostOutOfDiskSpace" device="/dev/sda1" fstype="btrfs" instance="hatysa.cbaines.net:9100" job="hatysa.cbaines.net/node-exporter" mountpoint="/var/lib/nars" severity="warning" firing 2024-04-24 11:58:12.917888974 +0000 UTC 6.173787462168437