commit 9c25d435a3c2c01aaf055df816ed795ad4b44816 Author: Tee Ngo Date: Mon Sep 28 12:07:14 2020 -0400 Increase max_pool_size for dc audits Increase max_pool_size for dcorch and dcmanager audits to avoid database thrashing with connect/disconnect requests resulting in sharp CPU spike caused by postgres on every dcorch/dcmanager audit cycle. The CPU spike is magnified when both dcorch and dcmanager audits happen to run at the same time which can impact resources intensive operations such as batch subcloud deployment. Low max_pool_size setting makes sense for on-demand services such as fm, not for services that perform regular audits. These settings will be re-assessed and adjusted when all DC scalability related features are complete. Closes-Bug: 1895605 Change-Id: I138faa640933bd255d7ae90d3388733f35431e4d Signed-off-by: Tee Ngo diff --git a/modules/puppet-dcmanager/src/dcmanager/manifests/init.pp b/modules/puppet-dcmanager/src/dcmanager/manifests/init.pp index 325d072..af5ea45 100644 --- a/modules/puppet-dcmanager/src/dcmanager/manifests/init.pp +++ b/modules/puppet-dcmanager/src/dcmanager/manifests/init.pp @@ -1,7 +1,7 @@ # # Files in this package are licensed under Apache; see LICENSE file. # -# Copyright (c) 2013-2016 Wind River Systems, Inc. +# Copyright (c) 2013-2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -22,7 +22,7 @@ class dcmanager ( $database_connection = '', $database_idle_timeout = 3600, - $database_max_pool_size = 1, + $database_max_pool_size = 105, $database_max_overflow = 100, $control_exchange = 'openstack', $rabbit_host = '127.0.0.1', diff --git a/modules/puppet-dcorch/src/dcorch/manifests/init.pp b/modules/puppet-dcorch/src/dcorch/manifests/init.pp index 62a1bb7..8ca67a3 100644 --- a/modules/puppet-dcorch/src/dcorch/manifests/init.pp +++ b/modules/puppet-dcorch/src/dcorch/manifests/init.pp @@ -1,7 +1,7 @@ # # Files in this package are licensed under Apache; see LICENSE file. # -# Copyright (c) 2013-2018 Wind River Systems, Inc. +# Copyright (c) 2013-2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -22,8 +22,8 @@ class dcorch ( $database_connection = '', $database_idle_timeout = 3600, - $database_max_pool_size = 1, - $database_max_overflow = 500, + $database_max_pool_size = 405, + $database_max_overflow = 100, $control_exchange = 'openstack', $rabbit_host = '127.0.0.1', $rabbit_port = 5672, diff --git a/puppet-manifests/src/modules/platform/manifests/postgresql.pp b/puppet-manifests/src/modules/platform/manifests/postgresql.pp index ab54fb5..59c6435 100644 --- a/puppet-manifests/src/modules/platform/manifests/postgresql.pp +++ b/puppet-manifests/src/modules/platform/manifests/postgresql.pp @@ -57,22 +57,14 @@ class platform::postgresql::server ( value => '1000', } - # Set large values for postgres in normal mode - # In AIO or virtual box, use reduced settings + # Set large values for postgres in standard or system controller. + # In AIO or virtual box, use reduced settings. # - - # Normal mode - # 1500 connections - # 80 MB shared buffer - # work_mem 512 MB since some ceilometer queries entail extensive - # TODO: with ceilometer removed, determine if work_mem can be revisited - # sorting as well as hash joins and hash based aggregation. - # checkpoint_segments increased to reduce frequency of checkpoints - if str2bool($::is_worker_subfunction) or str2bool($::is_virtual) { - # AIO or virtual box - # 700 connections needs about 80MB shared buffer - # Leave work_mem as the default for vbox and AIO - # Leave checkpoint_segments as the default for vbox and AIO + if ((str2bool($::is_worker_subfunction) and + ($::platform::params::distributed_cloud_role !='systemcontroller')) or + (str2bool($::is_virtual))) { + # Non system controller AIO or virtual box + # 700 connections, 80MB shared_buffers postgresql::server::config_entry { 'max_connections': value => '700', } @@ -80,6 +72,14 @@ class platform::postgresql::server ( value => '80MB', } } else { + # System controller or standard controller + # 1500 connections, 80MB shared_buffers, increase work_mem and + # checkpoint_segments + # TODO: + # - re-assess work_mem setting considering the complexity of the current + # queries. + # - re-assess shared_buffers setting for the system controller in a large + # distributed cloud. postgresql::server::config_entry { 'max_connections': value => '1500', }