提交 94a08704 编写于 作者: N Ning Yu 提交者: GitHub

Fix resource group memory overuse issue when increasing concurrency.

Resource group may have memory overuse in below case:

	CREATE RESOURCE GROUP rg_concurrency_test WITH
	(concurrency=1, cpu_rate_limit=20, memory_limit=60,
	 memory_shared_quota=0, memory_spill_ratio=10);
	CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test;

	11:SET ROLE role_concurrency_test;
	11:BEGIN;

	21:SET ROLE role_concurrency_test;
	22:SET ROLE role_concurrency_test;
	21&:BEGIN;
	22&:BEGIN;

	ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2;

	11:END;

The cause is that we didn't check overall memory quota usage in the
past, so pending queries can be waken up as long as the concurrency
limit is not reached, in such a case if the currently running tranctions
have used all the memory quota in the resource group then the overall
memory usage will be exceeded.

To fix this issue we now checks both concurrency limit and memory quota
usage to decide whether to wake up pending queries.
Signed-off-by: NZhenghua Lyu <zlv@pivotal.io>
上级 759c19d0
......@@ -374,6 +374,7 @@ InitProcess(void)
MyProc->waitProcLock = NULL;
MyProc->resWaiting = false;
MyProc->resGranted = false;
MyProc->resSlotId = -1;
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
SHMQueueInit(&(MyProc->myProcLocks[i]));
......
......@@ -172,6 +172,7 @@ struct PGPROC
bool resGranted; /* true means a resource group slot is granted.
false when wake up from a resource group which
is locked for drop */
int resSlotId; /* the resource group slot id granted */
};
/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
......
......@@ -25,6 +25,8 @@ extern int MaxResourceGroups;
extern double gp_resource_group_cpu_limit;
extern double gp_resource_group_memory_limit;
struct ResGroupConfigSnapshot;
/* Type of statistic infomation */
typedef enum
{
......@@ -54,7 +56,8 @@ extern void AllocResGroupEntry(Oid groupId);
extern void FreeResGroupEntry(Oid groupId);
extern void SerializeResGroupInfo(StringInfo str);
extern void DeserializeResGroupInfo(const char *buf, int len);
extern void DeserializeResGroupInfo(struct ResGroupConfigSnapshot *config,
const char *buf, int len);
extern bool ShouldAssignResGroupOnMaster(void);
extern void AssignResGroupOnMaster(void);
......
-- create a resource group when gp_resource_manager is queue
DROP ROLE IF EXISTS role_concurrency_test;
DROP
-- start_ignore
DROP RESOURCE GROUP rg_concurrency_test;
ERROR: resource group "rg_concurrency_test" does not exist
-- end_ignore
CREATE RESOURCE GROUP rg_concurrency_test WITH (concurrency=1, cpu_rate_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10);
CREATE
CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test;
CREATE
--
-- increase concurrency after pending queries
--
ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1;
ALTER
11:SET ROLE role_concurrency_test;
SET
11:BEGIN;
BEGIN
21:SET ROLE role_concurrency_test;
SET
22:SET ROLE role_concurrency_test;
SET
21&:BEGIN; <waiting ...>
22&:BEGIN; <waiting ...>
ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2;
ALTER
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rsgname |waiting_reason|current_query
-------------------+--------------+------------------------------------------------------------------
admin_group | |SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rg_concurrency_test| |<IDLE> in transaction
rg_concurrency_test|resgroup |BEGIN;
rg_concurrency_test|resgroup |BEGIN;
(4 rows)
11:END;
END
11q: ... <quitting>
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rsgname |waiting_reason|current_query
-------------------+--------------+------------------------------------------------------------------
admin_group | |SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rg_concurrency_test| |<IDLE> in transaction
rg_concurrency_test| |<IDLE> in transaction
(3 rows)
21<: <... completed>
BEGIN
22<: <... completed>
BEGIN
21:END;
END
22:END;
END
21q: ... <quitting>
22q: ... <quitting>
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rsgname |waiting_reason|current_query
-----------+--------------+------------------------------------------------------------------
admin_group| |SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
(1 row)
--
-- increase concurrency before pending queries
--
ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1;
ALTER
11:SET ROLE role_concurrency_test;
SET
11:BEGIN;
BEGIN
ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2;
ALTER
21:SET ROLE role_concurrency_test;
SET
22:SET ROLE role_concurrency_test;
SET
21&:BEGIN; <waiting ...>
22&:BEGIN; <waiting ...>
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rsgname |waiting_reason|current_query
-------------------+--------------+------------------------------------------------------------------
admin_group | |SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rg_concurrency_test| |<IDLE> in transaction
rg_concurrency_test|resgroup |BEGIN;
rg_concurrency_test|resgroup |BEGIN;
(4 rows)
11:END;
END
11q: ... <quitting>
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rsgname |waiting_reason|current_query
-------------------+--------------+------------------------------------------------------------------
admin_group | |SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rg_concurrency_test| |<IDLE> in transaction
rg_concurrency_test| |<IDLE> in transaction
(3 rows)
21<: <... completed>
BEGIN
22<: <... completed>
BEGIN
21:END;
END
22:END;
END
21q: ... <quitting>
22q: ... <quitting>
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
rsgname |waiting_reason|current_query
-----------+--------------+------------------------------------------------------------------
admin_group| |SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
(1 row)
-- cleanup
DROP ROLE role_concurrency_test;
DROP
DROP RESOURCE GROUP rg_concurrency_test;
DROP
......@@ -101,7 +101,7 @@ ALTER
SELECT r.rsgname, num_running, num_queueing, num_queued, num_executed FROM gp_toolkit.gp_resgroup_status s, pg_resgroup r WHERE s.groupid=r.oid AND r.rsgname='rg_concurrency_test';
rsgname |num_running|num_queueing|num_queued|num_executed
-------------------+-----------+------------+----------+------------
rg_concurrency_test|3 |0 |1 |3
rg_concurrency_test|2 |1 |1 |2
(1 row)
SELECT concurrency,proposed_concurrency FROM gp_toolkit.gp_resgroup_config WHERE groupname='rg_concurrency_test';
concurrency|proposed_concurrency
......
......@@ -19,6 +19,7 @@
! bash /tmp/.resgroup_mem_helper.sh;
! rm -f /tmp/.resgroup_mem_helper.sh;
! gpconfig -c gp_resource_manager -v group;
! gpconfig -c gp_resource_group_cpu_limit -v 0.9;
! gpstop -rai;
-- end_ignore
......
......@@ -5,8 +5,12 @@ test: resgroup_syntax
test: resgroup_transaction
test: resgroup_concurrency
test: resgroup_alter_concurrency
test: resgroup_memory_statistic
test: resgroup_memory_limit
test: resgroup_cpu_rate_limit
# memory spill tests
test: resgroup_memory_hashagg_spill
test: resgroup_memory_hashjoin_spill
test: resgroup_memory_materialize_spill
......@@ -14,6 +18,5 @@ test: resgroup_memory_sisc_mat_sort
test: resgroup_memory_sisc_sort_spill
test: resgroup_memory_sort_spill
test: resgroup_memory_spilltodisk
test: resgroup_cpu_rate_limit
test: disable_resgroup
......@@ -42,6 +42,9 @@
20170502:01:28:12:000367 gpconfig:sdw6:gpadmin-[WARNING]:-Managing queries with resource groups is an experimental feature. A work-in-progress version is enabled.
20170502:01:28:13:000367 gpconfig:sdw6:gpadmin-[INFO]:-completed successfully
! gpconfig -c gp_resource_group_cpu_limit -v 0.9;
20170803:10:42:57:015929 gpconfig:nyu-vm-centos:gpadmin-[INFO]:-completed successfully
! gpstop -rai;
-- end_ignore
......
-- create a resource group when gp_resource_manager is queue
DROP ROLE IF EXISTS role_concurrency_test;
-- start_ignore
DROP RESOURCE GROUP rg_concurrency_test;
-- end_ignore
CREATE RESOURCE GROUP rg_concurrency_test WITH
(concurrency=1, cpu_rate_limit=20, memory_limit=60, memory_shared_quota=0, memory_spill_ratio=10);
CREATE ROLE role_concurrency_test RESOURCE GROUP rg_concurrency_test;
--
-- increase concurrency after pending queries
--
ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1;
11:SET ROLE role_concurrency_test;
11:BEGIN;
21:SET ROLE role_concurrency_test;
22:SET ROLE role_concurrency_test;
21&:BEGIN;
22&:BEGIN;
ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2;
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
11:END;
11q:
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
21<:
22<:
21:END;
22:END;
21q:
22q:
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
--
-- increase concurrency before pending queries
--
ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 1;
11:SET ROLE role_concurrency_test;
11:BEGIN;
ALTER RESOURCE GROUP rg_concurrency_test SET CONCURRENCY 2;
21:SET ROLE role_concurrency_test;
22:SET ROLE role_concurrency_test;
21&:BEGIN;
22&:BEGIN;
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
11:END;
11q:
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
21<:
22<:
21:END;
22:END;
21q:
22q:
SELECT rsgname,waiting_reason,current_query FROM pg_stat_activity;
-- cleanup
DROP ROLE role_concurrency_test;
DROP RESOURCE GROUP rg_concurrency_test;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册