Nagios

There are various customizations you can do to tailor the deployment of OpenStack Nagios. You can find those below.

General Parameters

  • conf.httpd

    • Type: string

    • Description:

    • ServerRoot \"/usr/local/apache2\"
      
      Listen 80
      
      LoadModule mpm_event_module modules/mod_mpm_event.so
      LoadModule authn_file_module modules/mod_authn_file.so
      LoadModule authn_core_module modules/mod_authn_core.so
      LoadModule authz_host_module modules/mod_authz_host.so
      LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
      LoadModule authz_user_module modules/mod_authz_user.so
      LoadModule authz_core_module modules/mod_authz_core.so
      LoadModule access_compat_module modules/mod_access_compat.so
      LoadModule auth_basic_module modules/mod_auth_basic.so
      LoadModule ldap_module modules/mod_ldap.so
      LoadModule authnz_ldap_module modules/mod_authnz_ldap.so
      LoadModule reqtimeout_module modules/mod_reqtimeout.so
      LoadModule filter_module modules/mod_filter.so
      LoadModule proxy_html_module modules/mod_proxy_html.so
      LoadModule log_config_module modules/mod_log_config.so
      LoadModule env_module modules/mod_env.so
      LoadModule headers_module modules/mod_headers.so
      LoadModule setenvif_module modules/mod_setenvif.so
      LoadModule version_module modules/mod_version.so
      LoadModule proxy_module modules/mod_proxy.so
      LoadModule proxy_connect_module modules/mod_proxy_connect.so
      LoadModule proxy_http_module modules/mod_proxy_http.so
      LoadModule proxy_balancer_module modules/mod_proxy_balancer.so
      LoadModule slotmem_shm_module modules/mod_slotmem_shm.so
      LoadModule slotmem_plain_module modules/mod_slotmem_plain.so
      LoadModule unixd_module modules/mod_unixd.so
      LoadModule status_module modules/mod_status.so
      LoadModule autoindex_module modules/mod_autoindex.so
      
      <IfModule unixd_module>
      User daemon
      Group daemon
      </IfModule>
      
      <Directory />
          AllowOverride none
          Require all denied
      </Directory>
      
      <Files \".ht*\">
          Require all denied
      </Files>
      
      ErrorLog /dev/stderr
      
      LogLevel warn
      
      <IfModule log_config_module>
          LogFormat \"%a %l %u %t \\\"%r\\\" %>s %b \\\"%{Referer}i\\\" \\\"%{User-Agent}i\\\"\" combined
          LogFormat \"%{X-Forwarded-For}i %l %u %t \\\"%r\\\" %>s %b \\\"%{Referer}i\\\" \\\"%{User-Agent}i\\\"\" proxy
          LogFormat \"%h %l %u %t \\\"%r\\\" %>s %b\" common
      
          <IfModule logio_module>
            LogFormat \"%a %l %u %t \\\"%r\\\" %>s %b \\\"%{Referer}i\\\" \\\"%{User-Agent}i\\\" %I %O\" combinedio
          </IfModule>
      
          SetEnvIf X-Forwarded-For \"^.*\\..*\\..*\\..*\" forwarded
          CustomLog /dev/stdout common
          CustomLog /dev/stdout combined
          CustomLog /dev/stdout proxy env=forwarded
      </IfModule>
      
      <Directory \"/usr/local/apache2/cgi-bin\">
          AllowOverride None
          Options None
          Require all granted
      </Directory>
      
      <IfModule headers_module>
          RequestHeader unset Proxy early
      </IfModule>
      
      <IfModule proxy_html_module>
      Include conf/extra/proxy-html.conf
      </IfModule>
      
      <VirtualHost *:80>
        <Location />
            ProxyPass http://localhost:{{ tuple \"nagios\" \"internal\" \"nagios\" . | include \"helm-toolkit.endpoints.endpoint_port_lookup\" }}/
            ProxyPassReverse http://localhost:{{ tuple \"nagios\" \"internal\" \"nagios\" . | include \"helm-toolkit.endpoints.endpoint_port_lookup\" }}/
        </Location>
        <Proxy *>
            AuthName \"Nagios\"
            AuthType Basic
            AuthBasicProvider file ldap
            AuthUserFile /usr/local/apache2/conf/.htpasswd
            AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
            AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
            AuthLDAPURL {{ tuple \"ldap\" \"default\" \"ldap\" . | include \"helm-toolkit.endpoints.keystone_endpoint_uri_lookup\" | quote }}
            Require valid-user
        </Proxy>
      </VirtualHost>
      
  • conf.nagios.additionalPlugins

    • Type: list

    • Description:

    • []

  • conf.nagios.cgi.template

    • Type: string

    • Description:

    • action_url_target=_blank
      authorized_for_all_host_commands=*
      authorized_for_all_hosts=*
      authorized_for_all_service_commands=*
      authorized_for_all_services=*
      authorized_for_configuration_information=*
      authorized_for_system_commands=nagiosadmin
      authorized_for_system_information=*
      default_statuswrl_layout=4
      enable_page_tour=0
      escape_html_tags=1
      lock_author_names=1
      main_config_file=/opt/nagios/etc/nagios.cfg
      navbar_search_for_addresses=1
      navbar_search_for_aliases=1
      notes_url_target=_blank
      physical_html_path=/opt/nagios/share
      ping_syntax=/bin/ping -n -U -c 5 $HOSTADDRESS$
      refresh_rate=90
      result_limit=100
      show_context_help=0
      url_html_path=/nagios
      use_authentication=0
      use_pending_states=1
      use_ssl_authentication=0
      
  • conf.nagios.nagios.template

    • Type: string

    • Description:

    • accept_passive_host_checks=1
      accept_passive_service_checks=1
      additional_freshness_latency=15
      allow_empty_hostgroup_assignment=1
      auto_reschedule_checks=0
      auto_rescheduling_interval=30
      auto_rescheduling_window=180
      bare_update_check=0
      cached_host_check_horizon=15
      cached_service_check_horizon=15
      {{- $objectKeys := keys .Values.conf.nagios.objects -}}
      {{- range $object := $objectKeys }}
      cfg_file=/opt/nagios/etc/{{$object}}.cfg
      {{- end }}
      cfg_file=/opt/nagios/etc/objects/commands.cfg
      cfg_file=/opt/nagios/etc/objects/contacts.cfg
      cfg_file=/opt/nagios/etc/objects/timeperiods.cfg
      cfg_file=/opt/nagios/etc/objects/templates.cfg
      cfg_file=/opt/nagios/etc/conf.d/nagios-hosts.cfg
      
      check_external_commands=1
      check_for_orphaned_hosts=1
      check_for_orphaned_services=1
      check_for_updates=1
      check_host_freshness=0
      check_result_path=/opt/nagios/var/spool/checkresults
      check_result_reaper_frequency=10
      check_service_freshness=1
      check_workers=4
      command_file=/opt/nagios/var/rw/nagios.cmd
      daemon_dumps_core=0
      date_format=us
      debug_file=/opt/nagios/var/nagios.debug
      debug_level=0
      debug_verbosity=1
      enable_environment_macros=0
      enable_event_handlers=1
      enable_flap_detection=1
      enable_notifications=1
      enable_predictive_host_dependency_checks=1
      enable_predictive_service_dependency_checks=1
      event_broker_options=-1
      event_handler_timeout=60
      execute_host_checks=1
      execute_service_checks=1
      high_host_flap_threshold=20
      high_service_flap_threshold=20
      host_check_timeout=60
      host_freshness_check_interval=60
      host_inter_check_delay_method=s
      illegal_macro_output_chars=`~$&|'<>\"
      interval_length=1
      lock_file=/var/run/nagios.lock
      log_archive_path=/opt/nagios/var/log/archives
      log_current_states=1
      log_event_handlers=1
      log_external_commands=1
      log_file=/opt/nagios/var/log/nagios.log
      log_host_retries=1
      log_initial_states=0
      log_notifications=0
      log_passive_checks=1
      log_rotation_method=d
      log_service_retries=1
      low_host_flap_threshold=5
      low_service_flap_threshold=5
      max_check_result_file_age=3600
      max_check_result_reaper_time=30
      max_concurrent_checks=10
      max_debug_file_size=1e+06
      max_host_check_spread=30
      max_service_check_spread=30
      nagios_group=nagios
      nagios_user=nagios
      notification_timeout=60
      object_cache_file=/opt/nagios/var/objects.cache
      obsess_over_hosts=0
      obsess_over_services=0
      ocsp_timeout=5
      passive_host_checks_are_soft=0
      perfdata_timeout=5
      precached_object_file=/opt/nagios/var/objects.precache
      process_performance_data=0
      resource_file=/opt/nagios/etc/resource.cfg
      retain_state_information=1
      retained_contact_host_attribute_mask=0
      retained_contact_service_attribute_mask=0
      retained_host_attribute_mask=0
      retained_process_host_attribute_mask=0
      retained_process_service_attribute_mask=0
      retained_service_attribute_mask=0
      retention_update_interval=60
      service_check_timeout=60
      service_freshness_check_interval=60
      service_inter_check_delay_method=s
      service_interleave_factor=s
      soft_state_dependencies=0
      state_retention_file=/opt/nagios/var/retention.dat
      status_file=/opt/nagios/var/status.dat
      status_update_interval=10
      temp_file=/opt/nagios/var/nagios.tmp
      temp_path=/tmp
      translate_passive_host_checks=0
      use_aggressive_host_checking=0
      use_large_installation_tweaks=0
      use_regexp_matching=1
      use_retained_program_state=1
      use_retained_scheduling_info=1
      use_syslog=0
      use_true_regexp_matching=0
      
  • conf.nagios.notification.http.primary_target

    • Type: string

    • Description:

    • “127.0.0.1:3904/events”

  • conf.nagios.notification.http.secondary_target

    • Type: string

    • Description:

    • “127.0.0.1:3904/events”

  • conf.nagios.notification.snmp.primary_target

    • Type: string

    • Description:

    • “127.0.0.1:15162”

  • conf.nagios.notification.snmp.secondary_target

    • Type: string

    • Description:

    • “127.0.0.1:15162”

  • conf.nagios.objects.base.template

    • Type: string

    • Description:

    • define host {
        address 127.0.0.1
        alias Prometheus Monitoring
        check_command check-prometheus-host-alive
        host_name {{ tuple \"monitoring\" \"public\" . | include \"helm-toolkit.endpoints.hostname_short_endpoint_lookup\" }}
        hostgroups prometheus-hosts
        use linux-server
      }
      
      define contact {
        alias notifying contact
        contact_name notifying_contact
        host_notification_options d,u,r,f,s
        host_notification_period 24x7
        name notifying_contact
        register 0
        service_notification_options w,u,c,r,f,s
        service_notification_period 24x7
      }
      
      define contact {
        alias snmp contact
        contact_name snmp_notifying_contact
        host_notification_commands send_host_snmp_trap
        name snmp_notifying_contact
        service_notification_commands send_service_snmp_trap
        use notifying_contact
      }
      
      define contact {
        alias HTTP contact
        contact_name http_notifying_contact
        host_notification_commands send_host_http_post
        name http_notifying_contact
        service_notification_commands send_service_http_post
        use notifying_contact
      }
      
      define contactgroup {
        alias SNMP and HTTP notifying group
        contactgroup_name snmp_and_http_notifying_contact_group
        members snmp_notifying_contact,http_notifying_contact
      }
      
      define hostgroup {
        alias Prometheus Virtual Host
        hostgroup_name prometheus-hosts
      }
      
      define hostgroup {
        alias all
        hostgroup_name all
      }
      
      define hostgroup {
        alias base-os
        hostgroup_name base-os
      }
      
      define command {
        command_line $USER1$/send_service_trap.sh '$USER8$' '$HOSTNAME$' '$SERVICEDESC$' $SERVICESTATEID$ '$SERVICEOUTPUT$' '$USER4$' '$USER5$'
        command_name send_service_snmp_trap
      }
      
      define command {
        command_line $USER1$/send_host_trap.sh '$USER8$' '$HOSTNAME$' $HOSTSTATEID$ '$HOSTOUTPUT$' '$USER4$' '$USER5$'
        command_name send_host_snmp_trap
      }
      
      define command {
        command_line $USER1$/send_http_post_event.py --type service --hostname '$HOSTNAME$' --servicedesc '$SERVICEDESC$' --state_id $SERVICESTATEID$ --output '$SERVICEOUTPUT$' --monitoring_hostname '$HOSTNAME$' --primary_url '$USER6$' --secondary_url '$USER7$'
        command_name send_service_http_post
      }
      
      define command {
        command_line $USER1$/send_http_post_event.py --type host --hostname '$HOSTNAME$' --state_id $HOSTSTATEID$ --output '$HOSTOUTPUT$' --monitoring_hostname '$HOSTNAME$' --primary_url '$USER6$' --secondary_url '$USER7$'
        command_name send_host_http_post
      }
      
      define command {
        command_line $USER1$/check_rest_get_api.py --url $USER2$ --warning_response_seconds 5 --critical_response_seconds 10
        command_name check-prometheus-host-alive
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname '$ARG1$' --labels_csv '$ARG2$' --msg_format '$ARG3$' --ok_message '$ARG4$'
        command_name check_prom_alert_with_labels
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname '$ARG1$' --msg_format '$ARG2$' --ok_message '$ARG3$'
        command_name check_prom_alert
      }
      
      define service {
        check_interval 60
        contact_groups snmp_and_http_notifying_contact_group
        flap_detection_enabled 0
        name notifying_service
        notification_interval 120
        process_perf_data 0
        register 0
        retry_interval 30
        use generic-service
      }
      
  • conf.nagios.objects.ceph.template

    • Type: string

    • Description:

    • define service {
        check_command check_prom_alert!prom_exporter_ceph_unavailable!CRITICAL- CEPH exporter is not collecting metrics for alerting!OK- CEPH exporter metrics are available.
        hostgroup_name prometheus-hosts
        service_description Prometheus-exporter_CEPH
        use generic-service
      }
      
      define command {
        command_line $USER1$/check_exporter_health_metric.py --exporter_api $USER10$ --health_metric ceph_health_status --critical 2 --warning 1
        command_name check_ceph_health
      }
      
      define service {
        check_command check_ceph_health
        check_interval 300
        hostgroup_name base-os
        service_description CEPH_health
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!ceph_monitor_quorum_low!CRITICAL- ceph monitor quorum does not exist!OK- ceph monitor quorum exists
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description CEPH_quorum
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!ceph_monitor_quorum_absent!CRITICAL- ceph monitor quorum does not exist!OK- ceph monitor quorum exists
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description CEPH_quorum
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!ceph_cluster_usage_high!CRITICAL- ceph cluster storage is more than 80 percent!OK- ceph storage is less than 80 percent
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description CEPH_storage-usage
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!ceph_placement_group_degrade_pct_high!CRITICAL- ceph cluster PGs down are more than 80 percent!OK- ceph PG degradation is less than 80 percent
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description CEPH_PGs-degradation
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!ceph_osd_down!CRITICAL- One or more CEPH OSDs are down for more than 5 minutes!OK- All the CEPH OSDs are up
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description CEPH_OSDs-down
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert_with_labels!node_ntp_clock_skew_high!ceph-mon=\"enabled\"!CRITICAL- CEPH clock skew is more than 2 seconds!OK- CEPH clock skew is less than 2 seconds
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description CEPH_Clock-skew
        use notifying_service
      }
      
  • conf.nagios.objects.kubernetes.template

    • Type: string

    • Description:

    • define service {
        check_command check_prom_alert!prom_exporter_calico_unavailable!CRITICAL- Calico exporter is not collecting metrics for alerting!OK- Calico exporter metrics are available.
        hostgroup_name prometheus-hosts
        service_description Prometheus-exporter_Calico
        use generic-service
      }
      
      define service {
        check_command check_prom_alert!prom_exporter_kube_state_metrics_unavailable!CRITICAL- kube-state-metrics exporter is not collecting metrics for alerting!OK- kube-state-metrics exporter metrics are available.
        hostgroup_name prometheus-hosts
        service_description Prometheus-exporter_Kube-state-metrics
        use generic-service
      }
      
      define service {
        check_command check_prom_alert!K8SNodesNotReady!CRITICAL- One or more nodes are not ready.
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Nodes_health
        use generic-service
      }
      
      define service {
        check_command check_prom_alert_with_labels!kube_statefulset_replicas_unavailable!statefulset=\"prometheus\"!statefulset {statefulset} has lesser than configured replicas
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Prometheus_replica-count
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert_with_labels!kube_statefulset_replicas_unavailable!statefulset=\"alertmanager\"!statefulset {statefulset} has lesser than configured replicas
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description PrometheusAlertmanager_replica-count
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!kube_statefulset_replicas_unavailable!CRITICAL- statefulset {statefulset} has lesser than configured replicas!OK- All statefulsets have configured amount of replicas
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Statefulset_replica-count
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!daemonsets_misscheduled!CRITICAL- Daemonset {daemonset} is incorrectly scheudled!OK- No daemonset misscheduling detected
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Daemonset_misscheduled
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!daemonsets_not_scheduled!CRITICAL- Daemonset {daemonset} is missing to be scheduled in some nodes!OK- All daemonset scheduling is as desired
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Daemonset_not-scheduled
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!daemonset_pods_unavailable!CRITICAL- Daemonset {daemonset} has pods unavailable!OK- All daemonset pods available
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Daemonset_pods-unavailable
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!deployment_replicas_unavailable!CRITICAL- Deployment {deployment} has less than desired replicas!OK- All deployments have desired replicas
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Deployment_replicas-unavailable
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!volume_claim_capacity_high_utilization!CRITICAL- Volume claim {persistentvolumeclaim} has exceed 80% utilization!OK- All volume claims less than 80% utilization
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Volume_claim_high_utilization
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!rollingupdate_deployment_replica_less_than_spec_max_unavailable!CRITICAL- Deployment {deployment} has less than desired replicas during a rolling update!OK- All deployments have desired replicas
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description RollingUpdate_Deployment-replicas-unavailable
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!job_status_failed!CRITICAL- Job {exported_job} has failed!OK- No Job failures
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Job_status-failed
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!pod_status_pending!CRITICAL- Pod {pod} in namespace {namespace} has been in pending status for more than 10 minutes!OK- No pods in pending status
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Pod_status-pending
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!pod_status_error_image_pull!CRITICAL- Pod {pod} in namespace {namespace} has been in errpr status of ErrImagePull for more than 10 minutes!OK- No pods in error status
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Pod_status-error-image-pull
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert! pod_status_error_image_pull_backoff!CRITICAL- Pod {pod} in namespace {namespace} has been in errpr status of ImagePullBackOff for more than 10 minutes!OK- No pods in error status
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Pod_status-error-image-pull
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert! pod_error_config_error!CRITICAL- Pod {pod} in namespace {namespace} has been in errpr status of CreateContainerConfigError for more than 10 minutes!OK- No pods in error status
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Pod_status-error-image-pull
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!pod_error_crash_loop_back_off!CRITICAL- Pod {pod} in namespace {namespace} has been in error status of CrashLoopBackOff for more than 10 minutes!OK- No pods in crashLoopBackOff status
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Pod_status-crashLoopBackOff
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!replicaset_missing_replicas!CRITICAL- Replicaset {replicaset} is missing replicas!OK- No replicas missing from replicaset
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Replicaset_missing-replicas
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!pod_container_terminated!CRITICAL- pod {pod} in namespace {namespace} has a container in terminated state!OK- pod container status looks good
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description Pod_status-container-terminated
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert_with_labels!etcd_HighNumberOfFailedHTTPRequests!method=\"DELETE\"!CRITICAL- ETCD {instance} has a high HTTP DELETE operations failure!OK- ETCD at {instance} has low or no failures for HTTP DELETE
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description ETCD_high-http-delete-failures
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert_with_labels!etcd_HighNumberOfFailedHTTPRequests!method=~\"GET|QGET\"!CRITICAL- ETCD {instance} has a high HTTP GET operations failure!OK- ETCD at {instance} has low or no failures for HTTP GET
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description ETCD_high-http-get-failures
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert_with_labels!etcd_HighNumberOfFailedHTTPRequests!method=\"PUT\"!CRITICAL- ETCD {instance} has a high HTTP PUT operations failure!OK- ETCD at {instance} has low or no failures for HTTP PUT
        check_interval 60
        hostgroup_name prometheus-hosts
        service_description ETCD_high-http-update-failures
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!calico_iptable_save_errors_high_1h!CRITICAL- Felix instance {instance} has seen high iptable save errors within the last hour!OK- iptables save errors are none or low
        hostgroup_name prometheus-hosts
        service_description Calico_iptables-save-errors
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!calico_ipset_errors_high_1h!CRITICAL- Felix instance {instance} has seen high ipset errors within the last hour!OK- ipset errors are none or low
        hostgroup_name prometheus-hosts
        service_description Calico_ipset-errors
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!calico_datapane_iface_msg_batch_size_high_5m!CRITICAL- Felix instance {instance} has seen a high value of dataplane interface message batch size!OK- dataplane interface message batch size are low
        hostgroup_name prometheus-hosts
        service_description Calico_interface-message-batch-size
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!calico_datapane_address_msg_batch_size_high_5m!CRITICAL- Felix instance {instance} has seen a high value of dataplane address message batch size!OK- dataplane address message batch size are low
        hostgroup_name prometheus-hosts
        service_description Calico_address-message-batch-size
        use notifying_service
      }
      
      define service {
        check_command check_prom_alert!calico_datapane_failures_high_1h!CRITICAL- Felix instance {instance} has seen high dataplane failures within the last hour!OK- datapane failures are none or low
        hostgroup_name prometheus-hosts
        service_description Calico_datapane_failures_high
        use notifying_service
      }
      
  • conf.nagios.objects.node.template

    • Type: string

    • Description:

    • define service {
        check_command check_prom_alert!prom_exporter_node_unavailable!CRITICAL- Node exporter is not collecting metrics for alerting!OK- Node exporter metrics are available.
        hostgroup_name prometheus-hosts
        service_description Prometheus-exporter_Node
        use generic-service
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_filesystem_full_in_4h' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Mountpoint {mountpoint} will be full in four hours' --ok_message 'OK- All mountpoints usage rate is normal'
        command_name check_filespace_mounts-usage-rate-fullin4hrs
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_filesystem_full_80percent' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Mountpoint {mountpoint} is more than 80 pecent full' --ok_message 'OK- All mountpoints usage is normal'
        command_name check_filespace_mounts-usage
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_load1_90percent' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Node load average has been more than 90% for the pash hour' --ok_message 'OK- Node load average is normal'
        command_name check_node_loadavg
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_cpu_util_90percent' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Node CPU utilization has been more than 90% for the pash hour' --ok_message 'OK- Node cpu utilization is normal'
        command_name check_node_cpu_util
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_network_conntrack_usage_80percent' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Node network connections are more than 90% in use' --ok_message 'OK- Network connection utilization is normal'
        command_name check_network_connections
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_high_memory_load' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Node memory usage is more than 85%' --ok_message 'OK- Node memory usage is less than 85%'
        command_name check_memory_usage
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_disk_write_latency' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Disk write latency is high' --ok_message 'OK- Node disk write latency is normal'
        command_name check_disk_write_latency
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_disk_read_latency' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Disk read latency is high' --ok_message 'OK- Node disk read latency is normal'
        command_name check_disk_read_latency
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_entropy_available_low' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- System has low entropy availability' --ok_message 'OK- System entropy availability is sufficient'
        command_name check_entropy_availability
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_filedescriptors_full_in_3h' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- at current consumption rate no free file descriptors will be available in 3hrs.' --ok_message 'OK- System file descriptor consumption is ok.'
        command_name check_filedescriptor_usage_rate
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_hwmon_high_cpu_temp' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- CPU temperature is 90 percent of critical temperature.' --ok_message 'OK- CPU temperatures are normal.'
        command_name check_hwmon_high_cpu_temp
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_high_network_drop_rcv' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Host system has an unusally high drop in network reception.' --ok_message 'OK- network packet receive drops not high.'
        command_name check_network_receive_drop_high
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_high_network_drop_send' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Host system has an unusally high drop in network transmission.' --ok_message 'OK- network packet tramsmit drops not high.'
        command_name check_network_transmit_drop_high
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_high_network_errs_rcv' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Host system has an unusally high error rate in network reception.' --ok_message 'OK- network reception errors not high.'
        command_name check_network_receive_errors_high
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_high_network_errs_send' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Host system has an unusally high error rate in network transmission.' --ok_message 'OK- network transmission errors not high.'
        command_name check_network_transmit_errors_high
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_vmstat_paging_rate_high' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- Memory paging rate over 5 minutes is high.' --ok_message 'OK- Memory paging rate over 5 minutes is ok.'
        command_name check_vmstat_paging_rate
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_xfs_block_allocation_high' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- XFS block allocation is more than 80 percent of available.' --ok_message 'OK- XFS block allocation is less than 80 percent of available.'
        command_name check_xfs_block_allocation
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_network_bond_slaves_down' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- {master} is missing slave interfaces.' --ok_message 'OK- Network bonds have slave interfaces functional.'
        command_name check_network_bond_status
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_numa_memory_used' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- NUMA memory usage is more than 80 percent of available.' --ok_message 'OK- NUMA memory usage is normal.'
        command_name check_numa_memory_usage
      }
      
      define command {
        command_line $USER1$/query_prometheus_alerts.py --prometheus_api $USER2$ --alertname 'node_ntp_clock_skew_high' --labels_csv 'instance=~\"$HOSTADDRESS$.*\"' --msg_format 'CRITICAL- NTP clock skew is more than 2 seconds.' --ok_message 'OK- NTP clock skew is less than 2 seconds.'
        command_name check_ntp_sync
      }
      
      define service {
        check_command check_filespace_mounts-usage-rate-fullin4hrs
        check_interval 60
        hostgroup_name base-os
        service_description Filespace_mounts-usage-rate-fullin4hrs
        use notifying_service
      }
      
      define service {
        check_command check_filespace_mounts-usage
        check_interval 60
        hostgroup_name base-os
        service_description Filespace_mounts-usage
        use notifying_service
      }
      
      define service {
        check_command check_node_loadavg
        hostgroup_name base-os
        service_description CPU_Load-average
        use notifying_service
      }
      
      define service {
        check_command check_node_cpu_util
        hostgroup_name base-os
        service_description CPU_utilization
        use notifying_service
      }
      
      define service {
        check_command check_network_connections
        hostgroup_name base-os
        service_description Network_connections
        use notifying_service
      }
      
      define service {
        check_command check_memory_usage
        hostgroup_name base-os
        service_description Memory_usage
        use notifying_service
      }
      
      define service {
        check_command check_disk_write_latency
        hostgroup_name base-os
        service_description Disk_write-latency
        use notifying_service
      }
      
      define service {
        check_command check_disk_read_latency
        hostgroup_name base-os
        service_description Disk_read-latency
        use notifying_service
      }
      
      define service {
        check_command check_entropy_availability
        hostgroup_name base-os
        service_description Entropy_availability
        use notifying_service
      }
      
      define service {
        check_command check_filedescriptor_usage_rate
        hostgroup_name base-os
        service_description FileDescriptors_usage-rate-high
        use notifying_service
      }
      
      define service {
        check_command check_hwmon_high_cpu_temp
        hostgroup_name base-os
        service_description HW_cpu-temp-high
        use notifying_service
      }
      
      define service {
        check_command check_network_receive_drop_high
        hostgroup_name base-os
        service_description Network_receive-drop-high
        use notifying_service
      }
      
      define service {
        check_command check_network_transmit_drop_high
        hostgroup_name base-os
        service_description Network_transmit-drop-high
        use notifying_service
      }
      
      define service {
        check_command check_network_receive_errors_high
        hostgroup_name base-os
        service_description Network_receive-errors-high
        use notifying_service
      }
      
      define service {
        check_command check_network_transmit_errors_high
        hostgroup_name base-os
        service_description Network_transmit-errors-high
        use notifying_service
      }
      
      define service {
        check_command check_vmstat_paging_rate
        hostgroup_name base-os
        service_description Memory_vmstat-paging-rate
        use notifying_service
      }
      
      define service {
        check_command check_xfs_block_allocation
        hostgroup_name base-os
        service_description XFS_block-allocation
        use notifying_service
      }
      
      define service {
        check_command check_network_bond_status
        hostgroup_name base-os
        service_description Network_bondstatus
        use notifying_service
      }
      
      define service {
        check_command check_numa_memory_usage
        hostgroup_name base-os
        service_description Memory_NUMA-usage
        use notifying_service
      }
      
      define service {
        check_command check_ntp_sync
        hostgroup_name base-os
        service_description NTP_sync
        use notifying_service
      }
      
  • conf.nagios.query_es_clauses

    • Type: string

    • Description:

    • nil

  • dependencies.dynamic.common.jobs[0]

    • Type: string

    • Description:

    • “nagios-image-repo-sync”

  • dependencies.dynamic.common.services[0].endpoint

    • Type: string

    • Description:

    • “node”

  • dependencies.dynamic.common.services[0].service

    • Type: string

    • Description:

    • “local_image_registry”

  • dependencies.static.image_repo_sync.services[0].endpoint

    • Type: string

    • Description:

    • “internal”

  • dependencies.static.image_repo_sync.services[0].service

    • Type: string

    • Description:

    • “local_image_registry”

  • dependencies.static.nagios.services

    • Type: string

    • Description:

    • nil

  • dependencies.static.tests.services[0].endpoint

    • Type: string

    • Description:

    • “internal”

  • dependencies.static.tests.services[0].service

    • Type: string

    • Description:

    • “nagios”

  • endpoints.ceph_mgr.host_fqdn_override.default

    • Type: string

    • Description:

    • nil

  • endpoints.ceph_mgr.hosts.default

    • Type: string

    • Description:

    • “ceph-mgr”

  • endpoints.ceph_mgr.namespace

    • Type: string

    • Description:

    • nil

  • endpoints.ceph_mgr.port.metrics.default

    • Type: int

    • Description:

    • 9283

  • endpoints.ceph_mgr.port.mgr.default

    • Type: int

    • Description:

    • 7000

  • endpoints.ceph_mgr.scheme.default

    • Type: string

    • Description:

    • “http”

  • endpoints.cluster_domain_suffix

    • Type: string

    • Description:

    • “cluster.local”

  • endpoints.elasticsearch.auth.admin.password

    • Type: string

    • Description:

    • “changeme”

  • endpoints.elasticsearch.auth.admin.username

    • Type: string

    • Description:

    • “admin”

  • endpoints.elasticsearch.host_fqdn_override.default

    • Type: string

    • Description:

    • nil

  • endpoints.elasticsearch.hosts.default

    • Type: string

    • Description:

    • “elasticsearch-logging”

  • endpoints.elasticsearch.name

    • Type: string

    • Description:

    • “elasticsearch”

  • endpoints.elasticsearch.namespace

    • Type: string

    • Description:

    • nil

  • endpoints.elasticsearch.path.default

    • Type: string

    • Description:

    • “/”

  • endpoints.elasticsearch.port.http.default

    • Type: int

    • Description:

    • 80

  • endpoints.elasticsearch.scheme.default

    • Type: string

    • Description:

    • “http”

  • endpoints.ldap.auth.admin.bind

    • Type: string

    • Description:

    • “cn=admin,dc=cluster,dc=local”

  • endpoints.ldap.auth.admin.password

    • Type: string

    • Description:

    • “password”

  • endpoints.ldap.host_fqdn_override.default

    • Type: string

    • Description:

    • nil

  • endpoints.ldap.hosts.default

    • Type: string

    • Description:

    • “ldap”

  • endpoints.ldap.path.default

    • Type: string

    • Description:

    • “/ou=People,dc=cluster,dc=local”

  • endpoints.ldap.port.ldap.default

    • Type: int

    • Description:

    • 389

  • endpoints.ldap.scheme.default

    • Type: string

    • Description:

    • “ldap”

  • endpoints.local_image_registry.host_fqdn_override.default

    • Type: string

    • Description:

    • nil

  • endpoints.local_image_registry.hosts.default

    • Type: string

    • Description:

    • “localhost”

  • endpoints.local_image_registry.hosts.internal

    • Type: string

    • Description:

    • “docker-registry”

  • endpoints.local_image_registry.hosts.node

    • Type: string

    • Description:

    • “localhost”

  • endpoints.local_image_registry.name

    • Type: string

    • Description:

    • “docker-registry”

  • endpoints.local_image_registry.namespace

    • Type: string

    • Description:

    • “docker-registry”

  • endpoints.local_image_registry.port.registry.node

    • Type: int

    • Description:

    • 5000

  • endpoints.monitoring.auth.admin.password

    • Type: string

    • Description:

    • “changeme”

  • endpoints.monitoring.auth.admin.secret.tls.internal

    • Type: string

    • Description:

    • “prometheus-tls-api”

  • endpoints.monitoring.auth.admin.username

    • Type: string

    • Description:

    • “admin”

  • endpoints.monitoring.host_fqdn_override.default

    • Type: string

    • Description:

    • nil

  • endpoints.monitoring.hosts.default

    • Type: string

    • Description:

    • “prom-metrics”

  • endpoints.monitoring.hosts.public

    • Type: string

    • Description:

    • “prometheus”

  • endpoints.monitoring.name

    • Type: string

    • Description:

    • “prometheus”

  • endpoints.monitoring.path.default

    • Type: string

    • Description:

    • nil

  • endpoints.monitoring.port.http.default

    • Type: int

    • Description:

    • 80

  • endpoints.monitoring.scheme.default

    • Type: string

    • Description:

    • “http”

  • endpoints.nagios.auth.admin.password

    • Type: string

    • Description:

    • “password”

  • endpoints.nagios.auth.admin.username

    • Type: string

    • Description:

    • “nagiosadmin”

  • endpoints.nagios.host_fqdn_override.default

    • Type: string

    • Description:

    • nil

  • endpoints.nagios.hosts.default

    • Type: string

    • Description:

    • “nagios-metrics”

  • endpoints.nagios.hosts.public

    • Type: string

    • Description:

    • “nagios”

  • endpoints.nagios.name

    • Type: string

    • Description:

    • “nagios”

  • endpoints.nagios.namespace

    • Type: string

    • Description:

    • nil

  • endpoints.nagios.path.default

    • Type: string

    • Description:

    • nil

  • endpoints.nagios.port.http.default

    • Type: int

    • Description:

    • 80

  • endpoints.nagios.port.nagios.default

    • Type: int

    • Description:

    • 8000

  • endpoints.nagios.scheme.default

    • Type: string

    • Description:

    • “http”

  • endpoints.oci_image_registry.auth.enabled

    • Type: bool

    • Description:

    • false

  • endpoints.oci_image_registry.auth.nagios.password

    • Type: string

    • Description:

    • “password”

  • endpoints.oci_image_registry.auth.nagios.username

    • Type: string

    • Description:

    • “nagios”

  • endpoints.oci_image_registry.host_fqdn_override.default

    • Type: string

    • Description:

    • nil

  • endpoints.oci_image_registry.hosts.default

    • Type: string

    • Description:

    • “localhost”

  • endpoints.oci_image_registry.name

    • Type: string

    • Description:

    • “oci-image-registry”

  • endpoints.oci_image_registry.namespace

    • Type: string

    • Description:

    • “oci-image-registry”

  • endpoints.oci_image_registry.port.registry.default

    • Type: string

    • Description:

    • nil

  • images.local_registry.active

    • Type: bool

    • Description:

    • false

  • images.local_registry.exclude[0]

    • Type: string

    • Description:

    • “dep_check”

  • images.local_registry.exclude[1]

    • Type: string

    • Description:

    • “image_repo_sync”

  • images.pull_policy

    • Type: string

    • Description:

    • “IfNotPresent”

  • images.tags.apache_proxy

    • Type: string

    • Description:

    • “docker.io/library/httpd:2.4”

  • images.tags.dep_check

    • Type: string

    • Description:

    • “quay.io/stackanetes/kubernetes-entrypoint:v0.2.1”

  • images.tags.image_repo_sync

    • Type: string

    • Description:

    • “docker.io/library/docker:17.07.0”

  • images.tags.nagios

    • Type: string

    • Description:

    • “docker.io/openstackhelm/nagios:latest-ubuntu_jammy”

  • images.tags.selenium_tests

    • Type: string

    • Description:

    • “docker.io/openstackhelm/osh-selenium:latest-ubuntu_jammy”

  • labels.job.node_selector_key

    • Type: string

    • Description:

    • “openstack-control-plane”

  • labels.job.node_selector_value

    • Type: string

    • Description:

    • “enabled”

  • labels.nagios.node_selector_key

    • Type: string

    • Description:

    • “openstack-control-plane”

  • labels.nagios.node_selector_value

    • Type: string

    • Description:

    • “enabled”

  • labels.test.node_selector_key

    • Type: string

    • Description:

    • “openstack-control-plane”

  • labels.test.node_selector_value

    • Type: string

    • Description:

    • “enabled”

  • manifests.certificates

    • Type: bool

    • Description:

    • false

  • manifests.configmap_additional_plugins

    • Type: bool

    • Description:

    • false

  • manifests.configmap_bin

    • Type: bool

    • Description:

    • true

  • manifests.configmap_etc

    • Type: bool

    • Description:

    • true

  • manifests.deployment

    • Type: bool

    • Description:

    • true

  • manifests.ingress

    • Type: bool

    • Description:

    • true

  • manifests.job_image_repo_sync

    • Type: bool

    • Description:

    • true

  • manifests.network_policy

    • Type: bool

    • Description:

    • false

  • manifests.pod_helm_test

    • Type: bool

    • Description:

    • true

  • manifests.secret_ingress_tls

    • Type: bool

    • Description:

    • true

  • manifests.secret_nagios

    • Type: bool

    • Description:

    • true

  • manifests.secret_registry

    • Type: bool

    • Description:

    • true

  • manifests.service

    • Type: bool

    • Description:

    • true

  • manifests.service_ingress

    • Type: bool

    • Description:

    • true

  • network.nagios.ingress.annotations.”nginx.ingress.kubernetes.io/affinity”

    • Type: string

    • Description:

    • “cookie”

  • network.nagios.ingress.annotations.”nginx.ingress.kubernetes.io/configuration-snippet”

    • Type: string

    • Description:

    • more_set_headers \"X-Content-Type-Options: 'nosniff'\";
      more_set_headers \"X-Frame-Options: SAMEORIGIN\";
      more_set_headers \"Content-Security-Policy: script-src 'self'\";
      more_set_headers \"X-XSS-Protection: 1; mode=block\";
      
  • network.nagios.ingress.annotations.”nginx.ingress.kubernetes.io/rewrite-target”

    • Type: string

    • Description:

    • “/”

  • network.nagios.ingress.annotations.”nginx.ingress.kubernetes.io/session-cookie-expires”

    • Type: string

    • Description:

    • “600”

  • network.nagios.ingress.annotations.”nginx.ingress.kubernetes.io/session-cookie-hash”

    • Type: string

    • Description:

    • “sha1”

  • network.nagios.ingress.annotations.”nginx.ingress.kubernetes.io/session-cookie-max-age”

    • Type: string

    • Description:

    • “600”

  • network.nagios.ingress.annotations.”nginx.ingress.kubernetes.io/session-cookie-name”

    • Type: string

    • Description:

    • “kube-ingress-session-nagios”

  • network.nagios.ingress.classes.cluster

    • Type: string

    • Description:

    • “nginx-cluster”

  • network.nagios.ingress.classes.namespace

    • Type: string

    • Description:

    • “nginx”

  • network.nagios.ingress.public

    • Type: bool

    • Description:

    • true

  • network.nagios.node_port.enabled

    • Type: bool

    • Description:

    • false

  • network.nagios.node_port.port

    • Type: int

    • Description:

    • 30925

  • network_policy.nagios.egress[0]

    • Type: object

    • Description:

    • {}

  • network_policy.nagios.ingress[0]

    • Type: object

    • Description:

    • {}

  • pod.affinity.anti.topologyKey.default

    • Type: string

    • Description:

    • “kubernetes.io/hostname”

  • pod.affinity.anti.type.default

    • Type: string

    • Description:

    • “preferredDuringSchedulingIgnoredDuringExecution”

  • pod.affinity.anti.weight.default

    • Type: int

    • Description:

    • 10

  • pod.lifecycle.termination_grace_period.nagios.timeout

    • Type: int

    • Description:

    • 30

  • pod.lifecycle.upgrades.deployments.pod_replacement_strategy

    • Type: string

    • Description:

    • “RollingUpdate”

  • pod.lifecycle.upgrades.deployments.revision_history

    • Type: int

    • Description:

    • 3

  • pod.lifecycle.upgrades.deployments.rolling_update.max_surge

    • Type: int

    • Description:

    • 3

  • pod.lifecycle.upgrades.deployments.rolling_update.max_unavailable

    • Type: int

    • Description:

    • 1

  • pod.probes.monitoring.apache_proxy.readiness.enabled

    • Type: bool

    • Description:

    • true

  • pod.probes.monitoring.apache_proxy.readiness.params.initialDelaySeconds

    • Type: int

    • Description:

    • 20

  • pod.probes.monitoring.apache_proxy.readiness.params.periodSeconds

    • Type: int

    • Description:

    • 10

  • pod.probes.monitoring.nagios.readiness.enabled

    • Type: bool

    • Description:

    • true

  • pod.probes.monitoring.nagios.readiness.params.initialDelaySeconds

    • Type: int

    • Description:

    • 60

  • pod.probes.monitoring.nagios.readiness.params.periodSeconds

    • Type: int

    • Description:

    • 30

  • pod.probes.monitoring.nagios.readiness.params.timeoutSeconds

    • Type: int

    • Description:

    • 10

  • pod.replicas.nagios

    • Type: int

    • Description:

    • 1

  • pod.resources.apache_proxy.limits.cpu

    • Type: string

    • Description:

    • “2000m”

  • pod.resources.apache_proxy.limits.memory

    • Type: string

    • Description:

    • “1024Mi”

  • pod.resources.apache_proxy.requests.cpu

    • Type: string

    • Description:

    • “100m”

  • pod.resources.apache_proxy.requests.memory

    • Type: string

    • Description:

    • “128Mi”

  • pod.resources.enabled

    • Type: bool

    • Description:

    • false

  • pod.resources.jobs.image_repo_sync.limits.cpu

    • Type: string

    • Description:

    • “2000m”

  • pod.resources.jobs.image_repo_sync.limits.memory

    • Type: string

    • Description:

    • “1024Mi”

  • pod.resources.jobs.image_repo_sync.requests.cpu

    • Type: string

    • Description:

    • “100m”

  • pod.resources.jobs.image_repo_sync.requests.memory

    • Type: string

    • Description:

    • “128Mi”

  • pod.resources.jobs.tests.limits.cpu

    • Type: string

    • Description:

    • “2000m”

  • pod.resources.jobs.tests.limits.memory

    • Type: string

    • Description:

    • “1024Mi”

  • pod.resources.jobs.tests.requests.cpu

    • Type: string

    • Description:

    • “100m”

  • pod.resources.jobs.tests.requests.memory

    • Type: string

    • Description:

    • “128Mi”

  • pod.resources.nagios.limits.cpu

    • Type: string

    • Description:

    • “2000m”

  • pod.resources.nagios.limits.memory

    • Type: string

    • Description:

    • “1024Mi”

  • pod.resources.nagios.requests.cpu

    • Type: string

    • Description:

    • “100m”

  • pod.resources.nagios.requests.memory

    • Type: string

    • Description:

    • “128Mi”

  • pod.security_context.monitoring.container.apache_proxy.readOnlyRootFilesystem

    • Type: bool

    • Description:

    • false

  • pod.security_context.monitoring.container.define_nagios_hosts.readOnlyRootFilesystem

    • Type: bool

    • Description:

    • false

  • pod.security_context.monitoring.container.helm_tests.readOnlyRootFilesystem

    • Type: bool

    • Description:

    • true

  • pod.security_context.monitoring.container.nagios.readOnlyRootFilesystem

    • Type: bool

    • Description:

    • false

  • pod.security_context.monitoring.pod.runAsUser

    • Type: int

    • Description:

    • 0

  • secrets.nagios.admin

    • Type: string

    • Description:

    • “nagios-admin-creds”

  • secrets.oci_image_registry.nagios

    • Type: string

    • Description:

    • “nagios-oci-image-registry-key”

  • secrets.tls.nagios.nagios.public

    • Type: string

    • Description:

    • “nagios-tls-public”

  • selenium_v4

    • Type: bool

    • Description:

    • true