diff --git a/gpMgmt/bin/gpcheckresgroupimpl b/gpMgmt/bin/gpcheckresgroupimpl index 329b803591a02c2af495490206214aa6ff036868..f8de80fe2bb4bb809ac12e6a3c3e650cc64767a9 100755 --- a/gpMgmt/bin/gpcheckresgroupimpl +++ b/gpMgmt/bin/gpcheckresgroupimpl @@ -9,29 +9,26 @@ import psutil from gppylib.commands import gp from gppylib import gpversion +gpverstr = gp.GpVersion.local("", os.getenv("GPHOME")) +gpver = gpversion.GpVersion(gpverstr) + class dummy(object): def validate_all(self): exit("resource group is not supported on this platform") -def detectCgroupMountPoint(): - proc_mounts_path = "/proc/self/mounts" - if os.path.exists(proc_mounts_path): - with open(proc_mounts_path) as f: - for line in f: - mntent = line.split() - if mntent[2] != "cgroup": continue - mount_point = os.path.dirname(mntent[1]) - return mount_point - return "" - class cgroup(object): - mount_point = detectCgroupMountPoint() - tab = { 'r': os.R_OK, 'w': os.W_OK, 'x': os.X_OK, 'f': os.F_OK } - impl = "cgroup" - error_prefix = " is not properly configured: " + def __init__(self): + self.mount_point = self.detect_cgroup_mount_point() + self.tab = { 'r': os.R_OK, 'w': os.W_OK, 'x': os.X_OK, 'f': os.F_OK } + self.impl = "cgroup" + self.error_prefix = " is not properly configured: " + + self.compdirs = self.detect_comp_dirs() + if not self.validate_comp_dirs(): + self.compdirs = self.fallback_comp_dirs() def validate_all(self): """ @@ -44,44 +41,49 @@ class cgroup(object): if not self.mount_point: self.die("failed to detect cgroup mount point.") - self.validate_permission("cpu/gpdb/", "rwx") - self.validate_permission("cpu/gpdb/cgroup.procs", "rw") - self.validate_permission("cpu/gpdb/cpu.cfs_period_us", "rw") - self.validate_permission("cpu/gpdb/cpu.cfs_quota_us", "rw") - self.validate_permission("cpu/gpdb/cpu.shares", "rw") + if not self.compdirs: + self.die("failed to detect cgroup component dirs.") - self.validate_permission("cpuacct/gpdb/", "rwx") - self.validate_permission("cpuacct/gpdb/cgroup.procs", "rw") - self.validate_permission("cpuacct/gpdb/cpuacct.usage", "r") - self.validate_permission("cpuacct/gpdb/cpuacct.stat", "r") + self.validate_permission("cpu", "gpdb/", "rwx") + self.validate_permission("cpu", "gpdb/cgroup.procs", "rw") + self.validate_permission("cpu", "gpdb/cpu.cfs_period_us", "rw") + self.validate_permission("cpu", "gpdb/cpu.cfs_quota_us", "rw") + self.validate_permission("cpu", "gpdb/cpu.shares", "rw") - self.validate_permission("memory/memory.limit_in_bytes", "r") + self.validate_permission("cpuacct", "gpdb/", "rwx") + self.validate_permission("cpuacct", "gpdb/cgroup.procs", "rw") + self.validate_permission("cpuacct", "gpdb/cpuacct.usage", "r") + self.validate_permission("cpuacct", "gpdb/cpuacct.stat", "r") + + self.validate_permission("memory", "memory.limit_in_bytes", "r") # resgroup memory auditor is introduced in 6.0 devel and backported # to 5.x branch since 5.6.1. To provide backward compatibilities # memory permissions are only checked since 6.0. - gpverstr = gp.GpVersion.local("", os.getenv("GPHOME")) - gpver = gpversion.GpVersion(gpverstr) if gpver.version >= [6, 0, 0]: - self.validate_permission("memory/gpdb/", "rwx") - self.validate_permission("memory/gpdb/memory.limit_in_bytes", "rw") - self.validate_permission("memory/gpdb/memory.usage_in_bytes", "r") + self.validate_permission("memory", "gpdb/", "rwx") + self.validate_permission("memory", "gpdb/memory.limit_in_bytes", "rw") + self.validate_permission("memory", "gpdb/memory.usage_in_bytes", "r") - self.validate_permission("cpuset/gpdb/", "rwx") - self.validate_permission("cpuset/gpdb/cgroup.procs", "rw") - self.validate_permission("cpuset/gpdb/cpuset.cpus", "rw") - self.validate_permission("cpuset/gpdb/cpuset.mems", "rw") + self.validate_permission("cpuset", "gpdb/", "rwx") + self.validate_permission("cpuset", "gpdb/cgroup.procs", "rw") + self.validate_permission("cpuset", "gpdb/cpuset.cpus", "rw") + self.validate_permission("cpuset", "gpdb/cpuset.mems", "rw") def die(self, msg): exit(self.impl + self.error_prefix + msg) - def validate_permission(self, path, mode): + def validate_permission(self, comp, path, mode): """ Validate permission on path. If path is a dir it must ends with '/'. """ try: - fullpath = os.path.join(self.mount_point, path) + if comp not in self.compdirs: + self.die("can't find dir of cgroup component '%s'" % (comp)) + + compdir = self.compdirs[comp] + fullpath = os.path.join(self.mount_point, comp, compdir, path) pathtype = path[-1] == "/" and "directory" or "file" modebits = reduce(lambda x, y: x | y, map(lambda x: self.tab[x], mode), 0) @@ -96,6 +98,70 @@ class cgroup(object): self.die("can't check permission on %s '%s': %s" \ % (pathtype, fullpath, str(e))) + def validate_comp_dirs(self): + """ + Validate existance of cgroup component dirs. + + Return True if all the components dir exist and have good permission, + otherwise return False. + """ + + comps = ['cpu', 'cpuacct'] + if gpver.version >= [6, 0, 0]: + comps.extend(['cpuset', 'memory']) + + for comp in comps: + if comp not in self.compdirs: + return False + + compdir = self.compdirs[comp] + fullpath = os.path.join(self.mount_point, comp, compdir, 'gpdb') + + if not os.access(fullpath, os.R_OK | os.W_OK | os.X_OK): + return False + + return True + + def detect_cgroup_mount_point(self): + proc_mounts_path = "/proc/self/mounts" + if os.path.exists(proc_mounts_path): + with open(proc_mounts_path) as f: + for line in f: + mntent = line.split() + if mntent[2] != "cgroup": continue + mount_point = os.path.dirname(mntent[1]) + return mount_point + return "" + + def detect_comp_dirs(self): + compdirs = {} + path = "/proc/1/cgroup" + + if not os.path.exists(path): + return compdirs + + for line in open(path): + line = line.strip() + compid, compnames, comppath = line.split(":") + if not compnames or '=' in compnames: + continue + for compname in compnames.split(','): + compdirs[compname] = comppath.strip(os.path.sep) + + return compdirs + + def required_comps(self): + comps = ['cpu', 'cpuacct'] + if gpver.version >= [6, 0, 0]: + comps.extend(['cpuset', 'memory']) + return comps + + def fallback_comp_dirs(self): + compdirs = {} + for comp in self.required_comps(): + compdirs[comp] = '' + return compdirs + if __name__ == '__main__': if sys.platform.startswith('linux'): cgroup().validate_all() diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckresgroupimpl.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckresgroupimpl.py index 9d119c3ae57eed8ab6687754f9e983312ea66b64..4fff4b647a749bdbff758bb8b5c13749aaa52798 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckresgroupimpl.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gpcheckresgroupimpl.py @@ -16,13 +16,12 @@ import gpcheckresgroupimpl from gppylib.commands import gp from gppylib import gpversion + gpverstr = gp.GpVersion.local("", os.getenv("GPHOME")) gpver = gpversion.GpVersion(gpverstr) @unittest.skipUnless(sys.platform.startswith("linux"), "requires linux") class GpCheckResGroupImplCGroup(unittest.TestCase): - cgroup_mntpnt = None - cgroup_default_mntpnt = gpcheckresgroupimpl.detectCgroupMountPoint() def setUp(self): self.cgroup_mntpnt = tempfile.mkdtemp(prefix='fake-cgroup-mnt-') @@ -35,6 +34,9 @@ class GpCheckResGroupImplCGroup(unittest.TestCase): self.cgroup = gpcheckresgroupimpl.cgroup() self.cgroup.mount_point = self.cgroup_mntpnt self.cgroup.die = self.mock_cgroup_die + self.cgroup.compdirs = self.cgroup.fallback_comp_dirs() + + self.cgroup_default_mntpnt = self.cgroup.detect_cgroup_mount_point() os.mkdir(os.path.join(self.cgroup_mntpnt, "cpu", "gpdb"), 0700) self.touch(os.path.join(self.cgroup_mntpnt, "cpu", "gpdb", "cgroup.procs"), 0600) @@ -73,9 +75,84 @@ class GpCheckResGroupImplCGroup(unittest.TestCase): pass os.chmod(path, mode) + def test_comp_lists(self): + # this looks like redundant as it's just a copy of required_comps(), + # however it is necessary to verify this unit test is up-to-date. + comps = ['cpu', 'cpuacct'] + if gpver.version >= [6, 0, 0]: + comps.extend(['cpuset', 'memory']) + self.assertEqual(self.cgroup.required_comps(), comps) + + def test_comp_dirs_validation(self): + self.assertTrue(self.cgroup.validate_comp_dirs()) + + def test_comp_dirs_validation_when_cpu_gpdb_dir_bad_permission(self): + os.chmod(os.path.join(self.cgroup_mntpnt, "cpu", "gpdb"), 0100) + self.assertFalse(self.cgroup.validate_comp_dirs()) + os.chmod(os.path.join(self.cgroup_mntpnt, "cpu", "gpdb"), 0700) + + def test_comp_dirs_validation_when_cpu_gpdb_dir_missing(self): + shutil.rmtree(os.path.join(self.cgroup_mntpnt, "cpu", "gpdb")) + self.assertFalse(self.cgroup.validate_comp_dirs()) + + def test_comp_dirs_validation_when_cpuacct_gpdb_dir_bad_permission(self): + os.chmod(os.path.join(self.cgroup_mntpnt, "cpuacct", "gpdb"), 0100) + self.assertFalse(self.cgroup.validate_comp_dirs()) + os.chmod(os.path.join(self.cgroup_mntpnt, "cpuacct", "gpdb"), 0700) + + def test_comp_dirs_validation_when_cpuacct_gpdb_dir_missing(self): + shutil.rmtree(os.path.join(self.cgroup_mntpnt, "cpuacct", "gpdb")) + self.assertFalse(self.cgroup.validate_comp_dirs()) + + def test_comp_dirs_validation_when_cpuset_gpdb_dir_bad_permission(self): + os.chmod(os.path.join(self.cgroup_mntpnt, "cpuset", "gpdb"), 0100) + if gpver.version >= [6, 0, 0]: + self.assertFalse(self.cgroup.validate_comp_dirs()) + else: + self.assertTrue(self.cgroup.validate_comp_dirs()) + os.chmod(os.path.join(self.cgroup_mntpnt, "cpuset", "gpdb"), 0700) + + def test_comp_dirs_validation_when_cpuset_gpdb_dir_missing(self): + shutil.rmtree(os.path.join(self.cgroup_mntpnt, "cpuset", "gpdb")) + if gpver.version >= [6, 0, 0]: + self.assertFalse(self.cgroup.validate_comp_dirs()) + else: + self.assertTrue(self.cgroup.validate_comp_dirs()) + + def test_comp_dirs_validation_when_memory_gpdb_dir_bad_permission(self): + os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0100) + if gpver.version >= [6, 0, 0]: + self.assertFalse(self.cgroup.validate_comp_dirs()) + else: + self.assertTrue(self.cgroup.validate_comp_dirs()) + os.chmod(os.path.join(self.cgroup_mntpnt, "memory", "gpdb"), 0700) + + def test_comp_dirs_validation_when_memory_gpdb_dir_missing(self): + shutil.rmtree(os.path.join(self.cgroup_mntpnt, "memory", "gpdb")) + if gpver.version >= [6, 0, 0]: + self.assertFalse(self.cgroup.validate_comp_dirs()) + else: + self.assertTrue(self.cgroup.validate_comp_dirs()) + def test_proper_setup(self): self.cgroup.validate_all() + def test_proper_setup_with_non_default_cgroup_comp_dirs(self): + # set comp dir to comp.dir + compdirs = self.cgroup.compdirs + self.cgroup.compdirs = {} + for comp in compdirs.keys(): + self.cgroup.compdirs[comp] = comp + '.dir' + # move /sys/fs/cgroup/comp to /sys/fs/cgroup/comp/comp.dir + for comp in self.cgroup.compdirs.keys(): + compdir = self.cgroup.compdirs[comp] + olddir = os.path.join(self.cgroup_mntpnt, comp) + tmpdir = os.path.join(self.cgroup_mntpnt, compdir) + shutil.move(olddir, tmpdir) + os.mkdir(olddir, 0700) + shutil.move(tmpdir, olddir) + self.cgroup.validate_all() + def test_when_cpu_gpdb_dir_missing(self): shutil.rmtree(os.path.join(self.cgroup_mntpnt, "cpu", "gpdb")) with self.assertRaisesRegexp(AssertionError, "directory '.*/cpu/gpdb/' does not exist"): diff --git a/src/backend/utils/resgroup/resgroup-ops-dummy.c b/src/backend/utils/resgroup/resgroup-ops-dummy.c index be24af3be869e95cc0cfe8cb7bd451c29ab86878..9d505198ad4e5d641c0f453a656119698914e43a 100644 --- a/src/backend/utils/resgroup/resgroup-ops-dummy.c +++ b/src/backend/utils/resgroup/resgroup-ops-dummy.c @@ -112,10 +112,10 @@ ResGroupOps_AssignGroup(Oid group, ResGroupCaps *caps, int pid) * immediately. * * On success it return a fd to the OS group, pass it to - * ResGroupOps_UnLockGroup() to unblock it. + * ResGroupOps_UnLockGroup() to unlock it. */ int -ResGroupOps_LockGroup(Oid group, const char *comp, bool block) +ResGroupOps_LockGroup(Oid group, ResGroupCompType comp, bool block) { unsupported_system(); return -1; diff --git a/src/backend/utils/resgroup/resgroup-ops-linux.c b/src/backend/utils/resgroup/resgroup-ops-linux.c index 209c5f9fad186407ae1c71c6616b00a3148151eb..5a08f940ea55ac451efe9a4c67a04bd5279d5bde 100644 --- a/src/backend/utils/resgroup/resgroup-ops-linux.c +++ b/src/backend/utils/resgroup/resgroup-ops-linux.c @@ -46,6 +46,7 @@ #define CGROUP_CONFIG_ERROR(...) \ CGROUP_ERROR("cgroup is not properly configured: " __VA_ARGS__) +#define FALLBACK_COMP_DIR "" #define PROC_MOUNTS "/proc/self/mounts" #define MAX_INT_STRING_LEN 20 #define MAX_RETRY 10 @@ -66,7 +67,7 @@ typedef struct PermList PermList; struct PermItem { - const char *comp; + ResGroupCompType comp; const char *prop; int perm; }; @@ -82,18 +83,31 @@ struct PermList for ((i) = 0; (lists)[(i)].items; (i)++) #define foreach_perm_item(i, items) \ - for ((i) = 0; (items)[(i)].comp; (i)++) - -static char * buildPath(Oid group, const char *base, const char *comp, const char *prop, char *path, size_t pathsize); + for ((i) = 0; (items)[(i)].comp != RESGROUP_COMP_TYPE_UNKNOWN; (i)++) + +#define foreach_comp_type(comp) \ + for ((comp) = RESGROUP_COMP_TYPE_FIRST; \ + (comp) < RESGROUP_COMP_TYPE_COUNT; \ + (comp)++) + +static const char *compGetName(ResGroupCompType comp); +static ResGroupCompType compByName(const char *name); +static const char *compGetDir(ResGroupCompType comp); +static void compSetDir(ResGroupCompType comp, const char *dir); +static void detectCompDirs(void); +static bool validateCompDir(ResGroupCompType comp); +static void dumpCompDirs(void); + +static char *buildPath(Oid group, const char *base, ResGroupCompType comp, const char *prop, char *path, size_t pathsize); static int lockDir(const char *path, bool block); -static void unassignGroup(Oid group, const char *comp, int fddir); -static bool createDir(Oid group, const char *comp); -static bool removeDir(Oid group, const char *comp, const char *prop, bool unassign); +static void unassignGroup(Oid group, ResGroupCompType comp, int fddir); +static bool createDir(Oid group, ResGroupCompType comp); +static bool removeDir(Oid group, ResGroupCompType comp, const char *prop, bool unassign); static int getCpuCores(void); static size_t readData(const char *path, char *data, size_t datasize); static void writeData(const char *path, const char *data, size_t datasize); -static int64 readInt64(Oid group, const char *base, const char *comp, const char *prop); -static void writeInt64(Oid group, const char *base, const char *comp, const char *prop, int64 x); +static int64 readInt64(Oid group, const char *base, ResGroupCompType comp, const char *prop); +static void writeInt64(Oid group, const char *base, ResGroupCompType comp, const char *prop, int64 x); static bool permListCheck(const PermList *permlist, Oid group, bool report); static bool checkPermission(Oid group, bool report); static bool checkCpuSetPermission(Oid group, bool report); @@ -117,42 +131,42 @@ static char cgdir[MAXPGPATH]; */ static const PermItem perm_items_cpu[] = { - { "cpu", "", R_OK | W_OK | X_OK }, - { "cpu", "cgroup.procs", R_OK | W_OK }, - { "cpu", "cpu.cfs_period_us", R_OK | W_OK }, - { "cpu", "cpu.cfs_quota_us", R_OK | W_OK }, - { "cpu", "cpu.shares", R_OK | W_OK }, - { NULL, NULL, 0 } + { RESGROUP_COMP_TYPE_CPU, "", R_OK | W_OK | X_OK }, + { RESGROUP_COMP_TYPE_CPU, "cgroup.procs", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_CPU, "cpu.cfs_period_us", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_CPU, "cpu.cfs_quota_us", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_CPU, "cpu.shares", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } }; static const PermItem perm_items_cpu_acct[] = { - { "cpuacct", "", R_OK | W_OK | X_OK }, - { "cpuacct", "cgroup.procs", R_OK | W_OK }, - { "cpuacct", "cpuacct.usage", R_OK }, - { "cpuacct", "cpuacct.stat", R_OK }, - { NULL, NULL, 0 } + { RESGROUP_COMP_TYPE_CPUACCT, "", R_OK | W_OK | X_OK }, + { RESGROUP_COMP_TYPE_CPUACCT, "cgroup.procs", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_CPUACCT, "cpuacct.usage", R_OK }, + { RESGROUP_COMP_TYPE_CPUACCT, "cpuacct.stat", R_OK }, + { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } }; static const PermItem perm_items_cpuset[] = { - { "cpuset", "", R_OK | W_OK | X_OK }, - { "cpuset", "cgroup.procs", R_OK | W_OK }, - { "cpuset", "cpuset.cpus", R_OK | W_OK }, - { "cpuset", "cpuset.mems", R_OK | W_OK }, - { NULL, NULL, 0 } + { RESGROUP_COMP_TYPE_CPUSET, "", R_OK | W_OK | X_OK }, + { RESGROUP_COMP_TYPE_CPUSET, "cgroup.procs", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_CPUSET, "cpuset.cpus", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_CPUSET, "cpuset.mems", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } }; static const PermItem perm_items_memory[] = { - { "memory", "", R_OK | W_OK | X_OK }, - { "memory", "memory.limit_in_bytes", R_OK | W_OK }, - { "memory", "memory.usage_in_bytes", R_OK }, - { NULL, NULL, 0 } + { RESGROUP_COMP_TYPE_MEMORY, "", R_OK | W_OK | X_OK }, + { RESGROUP_COMP_TYPE_MEMORY, "memory.limit_in_bytes", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_MEMORY, "memory.usage_in_bytes", R_OK }, + { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } }; static const PermItem perm_items_swap[] = { - { "memory", "", R_OK | W_OK | X_OK }, - { "memory", "memory.memsw.limit_in_bytes", R_OK | W_OK }, - { "memory", "memory.memsw.usage_in_bytes", R_OK }, - { NULL, NULL, 0 } + { RESGROUP_COMP_TYPE_MEMORY, "", R_OK | W_OK | X_OK }, + { RESGROUP_COMP_TYPE_MEMORY, "memory.memsw.limit_in_bytes", R_OK | W_OK }, + { RESGROUP_COMP_TYPE_MEMORY, "memory.memsw.usage_in_bytes", R_OK }, + { RESGROUP_COMP_TYPE_UNKNOWN, NULL, 0 } }; /* @@ -208,6 +222,230 @@ static const PermList permlists[] = { NULL, false, NULL } }; +/* + * Comp names. + */ +const char *compnames[RESGROUP_COMP_TYPE_COUNT] = +{ + "cpu", "cpuacct", "memory", "cpuset" +}; + +/* + * Comp dirs. + */ +char compdirs[RESGROUP_COMP_TYPE_COUNT][MAXPGPATH] = +{ + FALLBACK_COMP_DIR, FALLBACK_COMP_DIR, FALLBACK_COMP_DIR, FALLBACK_COMP_DIR +}; + +/* + * Get the name of comp. + */ +static const char * +compGetName(ResGroupCompType comp) +{ + Assert(comp > RESGROUP_COMP_TYPE_UNKNOWN); + Assert(comp < RESGROUP_COMP_TYPE_COUNT); + + return compnames[comp]; +} + +/* + * Get the comp type from name. + */ +static ResGroupCompType +compByName(const char *name) +{ + ResGroupCompType comp; + + for (comp = 0; comp < RESGROUP_COMP_TYPE_COUNT; comp++) + if (strcmp(name, compGetName(comp)) == 0) + return comp; + + return RESGROUP_COMP_TYPE_UNKNOWN; +} + +/* + * Get the comp dir of comp. + */ +static const char * +compGetDir(ResGroupCompType comp) +{ + Assert(comp > RESGROUP_COMP_TYPE_UNKNOWN); + Assert(comp < RESGROUP_COMP_TYPE_COUNT); + + return compdirs[comp]; +} + +/* + * Set the comp dir of comp. + */ +static void +compSetDir(ResGroupCompType comp, const char *dir) +{ + Assert(comp > RESGROUP_COMP_TYPE_UNKNOWN); + Assert(comp < RESGROUP_COMP_TYPE_COUNT); + Assert(strlen(dir) < MAXPGPATH); + + strcpy(compdirs[comp], dir); +} + +/* + * Detect gpdb cgroup component dirs. + * + * Take cpu for example, by default we expect gpdb dir to locate at + * cgroup/cpu/gpdb. But we'll also check for the cgroup dirs of init process + * (pid 1), e.g. cgroup/cpu/custom, then we'll look for gpdb dir at + * cgroup/cpu/custom/gpdb, if it's found and has good permissions, it can be + * used instead of the default one. + * + * If any of the gpdb cgroup component dir can not be found under init process' + * cgroup dirs or has bad permissions we'll fallback all the gpdb cgroup + * component dirs to the default ones. + * + * NOTE: This auto detection will look for memory & cpuset gpdb dirs even on + * 5X. + */ +static void +detectCompDirs(void) +{ + ResGroupCompType comp; + FILE *f; + char buf[MAXPGPATH * 2]; + int maskAll = (1 << RESGROUP_COMP_TYPE_COUNT) - 1; + int maskDetected = 0; + + f = fopen("/proc/1/cgroup", "r"); + if (!f) + goto fallback; + + /* + * format: id:comps:path, e.g.: + * + * 10:cpuset:/ + * 4:cpu,cpuacct:/ + * 1:name=systemd:/init.scope + * 0::/init.scope + */ + while (fscanf(f, "%*d:%s", buf) != EOF) + { + ResGroupCompType comps[RESGROUP_COMP_TYPE_COUNT]; + int ncomps = 0; + char *ptr; + char *tmp; + char sep = '\0'; + int i; + + /* buf is stored with "comps:path" */ + + if (buf[0] == ':') + continue; /* ignore empty comp */ + + /* split comps */ + for (ptr = buf; sep != ':'; ptr = tmp) + { + tmp = strpbrk(ptr, ":,="); + + sep = *tmp; + *tmp++ = 0; + + /* for name=comp case there is nothing to do with the name */ + if (sep == '=') + continue; + + comp = compByName(ptr); + + if (comp == RESGROUP_COMP_TYPE_UNKNOWN) + continue; /* not used by us */ + + /* + * push the comp to the comps stack, but if the stack is already + * full (which is unlikely to happen in real world), simply ignore + * it. + */ + if (ncomps < RESGROUP_COMP_TYPE_COUNT) + comps[ncomps++] = comp; + } + + /* now ptr point to the path */ + Assert(strlen(ptr) < MAXPGPATH); + + /* if the path is "/" then use empty string "" instead of it */ + if (strcmp(ptr, "/") == 0) + ptr[0] = '\0'; + + /* validate and set path for the comps */ + for (i = 0; i < ncomps; i++) + { + comp = comps[i]; + compSetDir(comp, ptr); + + if (!validateCompDir(comp)) + goto fallback; /* dir missing or bad permissions */ + + if (maskDetected & (1 << comp)) + goto fallback; /* comp are detected more than once */ + + maskDetected |= 1 << comp; + } + } + + if (maskDetected != maskAll) + goto fallback; /* not all the comps are detected */ + + dumpCompDirs(); + + fclose(f); + return; + +fallback: + /* set the fallback dirs for all the comps */ + foreach_comp_type(comp) + { + compSetDir(comp, FALLBACK_COMP_DIR); + } + + dumpCompDirs(); + + fclose(f); +} + +/* + * Validate a comp dir. + * + * Return True if it exists and has good permissions, + * return False otherwise. + */ +static bool +validateCompDir(ResGroupCompType comp) +{ + char path[MAXPGPATH]; + size_t pathsize = sizeof(path); + + buildPath(RESGROUP_ROOT_ID, NULL, comp, "", path, pathsize); + + return access(path, R_OK | W_OK | X_OK) == 0; +} + +/* + * Dump comp dirs. + */ +static void +dumpCompDirs(void) +{ + ResGroupCompType comp; + char path[MAXPGPATH]; + size_t pathsize = sizeof(path); + + foreach_comp_type(comp) + { + buildPath(RESGROUP_ROOT_ID, NULL, comp, "", path, pathsize); + + elog(LOG, "gpdb dir for cgroup component \"%s\": %s", + compGetName(comp), path); + } +} + /* * Build path string with parameters. * - if base is NULL, use default value "gpdb" @@ -217,11 +455,14 @@ static const PermList permlists[] = static char * buildPath(Oid group, const char *base, - const char *comp, + ResGroupCompType comp, const char *prop, char *path, size_t pathsize) { + const char *compname = compGetName(comp); + const char *compdir = compGetDir(comp); + Assert(cgdir[0] != 0); if (!base) @@ -229,15 +470,18 @@ buildPath(Oid group, if (group == RESGROUP_COMPROOT_ID) { - snprintf(path, pathsize, "%s/%s/%s", cgdir, comp, prop); + snprintf(path, pathsize, "%s/%s%s/%s", + cgdir, compname, compdir, prop); } else if (group != RESGROUP_ROOT_ID) { - snprintf(path, pathsize, "%s/%s/%s/%d/%s", cgdir, comp, base, group, prop); + snprintf(path, pathsize, "%s/%s%s/%s/%d/%s", + cgdir, compname, compdir, base, group, prop); } else { - snprintf(path, pathsize, "%s/%s/%s/%s", cgdir, comp, base, prop); + snprintf(path, pathsize, "%s/%s%s/%s/%s", + cgdir, compname, compdir, base, prop); } return path; @@ -253,7 +497,7 @@ buildPath(Oid group, * (and unlocked implicitly) then an error is raised. */ static void -unassignGroup(Oid group, const char *comp, int fddir) +unassignGroup(Oid group, ResGroupCompType comp, int fddir) { char path[MAXPGPATH]; size_t pathsize = sizeof(path); @@ -445,7 +689,7 @@ lockDir(const char *path, bool block) * Create the cgroup dir for group. */ static bool -createDir(Oid group, const char *comp) +createDir(Oid group, ResGroupCompType comp) { char path[MAXPGPATH]; size_t pathsize = sizeof(path); @@ -464,7 +708,7 @@ createDir(Oid group, const char *comp) * - if unassign is true then unassign all the processes first before removal; */ static bool -removeDir(Oid group, const char *comp, const char *prop, bool unassign) +removeDir(Oid group, ResGroupCompType comp, const char *prop, bool unassign) { char path[MAXPGPATH]; size_t pathsize = sizeof(path); @@ -614,7 +858,7 @@ writeData(const char *path, const char *data, size_t datasize) * Read an int64 value from a cgroup interface file. */ static int64 -readInt64(Oid group, const char *base, const char *comp, const char *prop) +readInt64(Oid group, const char *base, ResGroupCompType comp, const char *prop) { int64 x; char data[MAX_INT_STRING_LEN]; @@ -636,7 +880,8 @@ readInt64(Oid group, const char *base, const char *comp, const char *prop) * Write an int64 value to a cgroup interface file. */ static void -writeInt64(Oid group, const char *base, const char *comp, const char *prop, int64 x) +writeInt64(Oid group, const char *base, + ResGroupCompType comp, const char *prop, int64 x) { char data[MAX_INT_STRING_LEN]; size_t datasize = sizeof(data); @@ -653,7 +898,8 @@ writeInt64(Oid group, const char *base, const char *comp, const char *prop, int6 * Read a string value from a cgroup interface file. */ static void -readStr(Oid group, const char *base, const char *comp, const char *prop, char *str, int len) +readStr(Oid group, const char *base, + ResGroupCompType comp, const char *prop, char *str, int len) { char data[MAX_INT_STRING_LEN]; size_t datasize = sizeof(data); @@ -671,8 +917,8 @@ readStr(Oid group, const char *base, const char *comp, const char *prop, char *s * Write an string value to a cgroup interface file. */ static void -writeStr(Oid group, const char *base, const char *comp, const char *prop, - const char *strValue) +writeStr(Oid group, const char *base, + ResGroupCompType comp, const char *prop, const char *strValue) { char path[MAXPGPATH]; size_t pathsize = sizeof(path); @@ -701,7 +947,7 @@ permListCheck(const PermList *permlist, Oid group, bool report) foreach_perm_item(i, permlist->items) { - const char *comp = permlist->items[i].comp; + ResGroupCompType comp = permlist->items[i].comp; const char *prop = permlist->items[i].prop; int perm = permlist->items[i].perm; @@ -783,12 +1029,14 @@ getMemoryInfo(unsigned long *ram, unsigned long *swap) static void getCgMemoryInfo(uint64 *cgram, uint64 *cgmemsw) { - *cgram = readInt64(RESGROUP_ROOT_ID, "", "memory", "memory.limit_in_bytes"); + ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; + + *cgram = readInt64(RESGROUP_ROOT_ID, "", comp, "memory.limit_in_bytes"); if (gp_resource_group_enable_cgroup_swap) { *cgmemsw = readInt64(RESGROUP_ROOT_ID, "", - "memory", "memory.memsw.limit_in_bytes"); + comp, "memory.memsw.limit_in_bytes"); } else { @@ -881,6 +1129,8 @@ ResGroupOps_Probe(void) if (!detectCgroupMountPoint()) return false; + detectCompDirs(); + /* * Probe for optional features like the 'cgroup' memory auditor, * do not raise any errors. @@ -937,7 +1187,7 @@ ResGroupOps_Init(void) int64 cfs_period_us; int ncores = getCpuCores(); - const char *comp = "cpu"; + ResGroupCompType comp = RESGROUP_COMP_TYPE_CPU; cfs_period_us = readInt64(RESGROUP_ROOT_ID, NULL, comp, "cpu.cfs_period_us"); writeInt64(RESGROUP_ROOT_ID, NULL, comp, "cpu.cfs_quota_us", @@ -954,12 +1204,15 @@ ResGroupOps_Init(void) * parent directory */ char buffer[MaxCpuSetLength]; - readStr(RESGROUP_COMPROOT_ID, NULL, "cpuset", "cpuset.mems", + + comp = RESGROUP_COMP_TYPE_CPUSET; + + readStr(RESGROUP_COMPROOT_ID, NULL, comp, "cpuset.mems", buffer, sizeof(buffer)); - writeStr(RESGROUP_ROOT_ID, NULL, "cpuset", "cpuset.mems", buffer); - readStr(RESGROUP_COMPROOT_ID, NULL, "cpuset", "cpuset.cpus", + writeStr(RESGROUP_ROOT_ID, NULL, comp, "cpuset.mems", buffer); + readStr(RESGROUP_COMPROOT_ID, NULL, comp, "cpuset.cpus", buffer, sizeof(buffer)); - writeStr(RESGROUP_ROOT_ID, NULL, "cpuset", "cpuset.cpus", buffer); + writeStr(RESGROUP_ROOT_ID, NULL, comp, "cpuset.cpus", buffer); createDefaultCpuSetGroup(); } @@ -994,12 +1247,12 @@ ResGroupOps_CreateGroup(Oid group) { int retry = 0; - if (!createDir(group, "cpu") - || !createDir(group, "cpuacct") - || (gp_resource_group_enable_cgroup_cpuset && - !createDir(group, "cpuset")) - || (gp_resource_group_enable_cgroup_memory && - !createDir(group, "memory"))) + if (!createDir(group, RESGROUP_COMP_TYPE_CPU) || + !createDir(group, RESGROUP_COMP_TYPE_CPUACCT) || + (gp_resource_group_enable_cgroup_cpuset && + !createDir(group, RESGROUP_COMP_TYPE_CPUSET)) || + (gp_resource_group_enable_cgroup_memory && + !createDir(group, RESGROUP_COMP_TYPE_MEMORY))) { CGROUP_ERROR("can't create cgroup for resgroup '%d': %s", group, strerror(errno)); @@ -1026,23 +1279,16 @@ ResGroupOps_CreateGroup(Oid group) /* * Initialize cpuset.mems and cpuset.cpus values as its parent directory */ + ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUSET; char buffer[MaxCpuSetLength]; - readStr(RESGROUP_ROOT_ID, - NULL, - "cpuset", - "cpuset.mems", - buffer, - sizeof(buffer)); - writeStr(group, NULL, "cpuset", "cpuset.mems", buffer); - - readStr(RESGROUP_ROOT_ID, - NULL, - "cpuset", - "cpuset.cpus", - buffer, - sizeof(buffer)); - writeStr(group, NULL, "cpuset", "cpuset.cpus", buffer); + readStr(RESGROUP_ROOT_ID, NULL, comp, "cpuset.mems", + buffer, sizeof(buffer)); + writeStr(group, NULL, comp, "cpuset.mems", buffer); + + readStr(RESGROUP_ROOT_ID, NULL, comp, "cpuset.cpus", + buffer, sizeof(buffer)); + writeStr(group, NULL, comp, "cpuset.cpus", buffer); } } @@ -1053,9 +1299,10 @@ ResGroupOps_CreateGroup(Oid group) static void createDefaultCpuSetGroup(void) { + ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUSET; int retry = 0; - if (!createDir(DEFAULT_CPUSET_GROUP_ID, "cpuset")) + if (!createDir(DEFAULT_CPUSET_GROUP_ID, comp)) { CGROUP_ERROR("can't create cpuset cgroup for resgroup '%d': %s", DEFAULT_CPUSET_GROUP_ID, strerror(errno)); @@ -1084,21 +1331,13 @@ createDefaultCpuSetGroup(void) */ char buffer[MaxCpuSetLength]; - readStr(RESGROUP_ROOT_ID, - NULL, - "cpuset", - "cpuset.mems", - buffer, - sizeof(buffer)); - writeStr(DEFAULT_CPUSET_GROUP_ID, NULL, "cpuset", "cpuset.mems", buffer); - - readStr(RESGROUP_ROOT_ID, - NULL, - "cpuset", - "cpuset.cpus", - buffer, - sizeof(buffer)); - writeStr(DEFAULT_CPUSET_GROUP_ID, NULL, "cpuset", "cpuset.cpus", buffer); + readStr(RESGROUP_ROOT_ID, NULL, comp, "cpuset.mems", + buffer, sizeof(buffer)); + writeStr(DEFAULT_CPUSET_GROUP_ID, NULL, comp, "cpuset.mems", buffer); + + readStr(RESGROUP_ROOT_ID, NULL, comp, "cpuset.cpus", + buffer, sizeof(buffer)); + writeStr(DEFAULT_CPUSET_GROUP_ID, NULL, comp, "cpuset.cpus", buffer); } /* @@ -1110,12 +1349,12 @@ createDefaultCpuSetGroup(void) void ResGroupOps_DestroyGroup(Oid group, bool migrate) { - if (!removeDir(group, "cpu", "cpu.shares", migrate) - || !removeDir(group, "cpuacct", NULL, migrate) - || (gp_resource_group_enable_cgroup_cpuset && - !removeDir(group, "cpuset", NULL, migrate)) - || (gp_resource_group_enable_cgroup_memory && - !removeDir(group, "memory", "memory.limit_in_bytes", migrate))) + if (!removeDir(group, RESGROUP_COMP_TYPE_CPU, "cpu.shares", migrate) || + !removeDir(group, RESGROUP_COMP_TYPE_CPUACCT, NULL, migrate) || + (gp_resource_group_enable_cgroup_cpuset && + !removeDir(group, RESGROUP_COMP_TYPE_CPUSET, NULL, migrate)) || + (gp_resource_group_enable_cgroup_memory && + !removeDir(group, RESGROUP_COMP_TYPE_MEMORY, "memory.limit_in_bytes", migrate))) { CGROUP_ERROR("can't remove cgroup for resgroup '%d': %s", group, strerror(errno)); @@ -1145,19 +1384,21 @@ ResGroupOps_AssignGroup(Oid group, ResGroupCaps *caps, int pid) ) return; - writeInt64(group, NULL, "cpu", "cgroup.procs", pid); - writeInt64(group, NULL, "cpuacct", "cgroup.procs", pid); + writeInt64(group, NULL, RESGROUP_COMP_TYPE_CPU, "cgroup.procs", pid); + writeInt64(group, NULL, RESGROUP_COMP_TYPE_CPUACCT, "cgroup.procs", pid); if (gp_resource_group_enable_cgroup_cpuset) { if (caps == NULL || !curViaCpuset) { /* add pid to default group */ - writeInt64(DEFAULT_CPUSET_GROUP_ID, NULL, "cpuset", "cgroup.procs", pid); + writeInt64(DEFAULT_CPUSET_GROUP_ID, NULL, + RESGROUP_COMP_TYPE_CPUSET, "cgroup.procs", pid); } else { - writeInt64(group, NULL, "cpuset", "cgroup.procs", pid); + writeInt64(group, NULL, + RESGROUP_COMP_TYPE_CPUSET, "cgroup.procs", pid); } } @@ -1181,10 +1422,10 @@ ResGroupOps_AssignGroup(Oid group, ResGroupCaps *caps, int pid) * immediately. * * On success it return a fd to the OS group, pass it to - * ResGroupOps_UnLockGroup() to unblock it. + * ResGroupOps_UnLockGroup() to unlock it. */ int -ResGroupOps_LockGroup(Oid group, const char *comp, bool block) +ResGroupOps_LockGroup(Oid group, ResGroupCompType comp, bool block) { char path[MAXPGPATH]; size_t pathsize = sizeof(path); @@ -1214,7 +1455,7 @@ ResGroupOps_UnLockGroup(Oid group, int fd) void ResGroupOps_SetCpuRateLimit(Oid group, int cpu_rate_limit) { - const char *comp = "cpu"; + ResGroupCompType comp = RESGROUP_COMP_TYPE_CPU; /* SUB/shares := TOP/shares * cpu_rate_limit */ @@ -1230,13 +1471,14 @@ ResGroupOps_SetCpuRateLimit(Oid group, int cpu_rate_limit) void ResGroupOps_SetMemoryLimit(Oid group, int memory_limit) { + ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; int fd; int32 memory_limit_in_chunks; memory_limit_in_chunks = ResGroupGetVmemLimitChunks() * memory_limit / 100; memory_limit_in_chunks *= ResGroupGetSegmentNum(); - fd = ResGroupOps_LockGroup(group, "memory", true); + fd = ResGroupOps_LockGroup(group, comp, true); ResGroupOps_SetMemoryLimitByValue(group, memory_limit_in_chunks); ResGroupOps_UnLockGroup(group, fd); } @@ -1252,7 +1494,7 @@ ResGroupOps_SetMemoryLimit(Oid group, int memory_limit) void ResGroupOps_SetMemoryLimitByValue(Oid group, int32 memory_limit) { - const char *comp = "memory"; + ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; int64 memory_limit_in_bytes; if (!gp_resource_group_enable_cgroup_memory) @@ -1307,7 +1549,7 @@ ResGroupOps_SetMemoryLimitByValue(Oid group, int32 memory_limit) int64 ResGroupOps_GetCpuUsage(Oid group) { - const char *comp = "cpuacct"; + ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUACCT; return readInt64(group, NULL, comp, "cpuacct.usage"); } @@ -1320,7 +1562,7 @@ ResGroupOps_GetCpuUsage(Oid group) int32 ResGroupOps_GetMemoryUsage(Oid group) { - const char *comp = "memory"; + ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; int64 memory_usage_in_bytes; char *prop; @@ -1345,7 +1587,7 @@ ResGroupOps_GetMemoryUsage(Oid group) int32 ResGroupOps_GetMemoryLimit(Oid group) { - const char *comp = "memory"; + ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; int64 memory_limit_in_bytes; /* Report unlimited (max int32) if cgroup memory is not enabled */ @@ -1414,9 +1656,11 @@ ResGroupOps_GetTotalMemory(void) void ResGroupOps_SetCpuSet(Oid group, const char *cpuset) { + ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUSET; + if (!gp_resource_group_enable_cgroup_cpuset) return ; - const char *comp = "cpuset"; + writeStr(group, NULL, comp, "cpuset.cpus", cpuset); } @@ -1429,8 +1673,10 @@ ResGroupOps_SetCpuSet(Oid group, const char *cpuset) void ResGroupOps_GetCpuSet(Oid group, char *cpuset, int len) { + ResGroupCompType comp = RESGROUP_COMP_TYPE_CPUSET; + if (!gp_resource_group_enable_cgroup_cpuset) return ; - const char *comp = "cpuset"; + readStr(group, NULL, comp, "cpuset.cpus", cpuset, len); } diff --git a/src/backend/utils/resgroup/resgroup.c b/src/backend/utils/resgroup/resgroup.c index 6c6fa8b2b9d8247eff22c7514c193b4ca2bcd3e2..370c25f200a0685cddd5ed674bb9192915d6dc5c 100644 --- a/src/backend/utils/resgroup/resgroup.c +++ b/src/backend/utils/resgroup/resgroup.c @@ -3792,6 +3792,7 @@ groupMemOnAlterForCgroup(Oid groupId, ResGroupData *group) static void groupApplyCgroupMemInc(ResGroupData *group) { + ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; int32 memory_limit; int32 memory_inc; int fd; @@ -3804,7 +3805,7 @@ groupApplyCgroupMemInc(ResGroupData *group) if (memory_inc <= 0) return; - fd = ResGroupOps_LockGroup(group->groupId, "memory", true); + fd = ResGroupOps_LockGroup(group->groupId, comp, true); memory_limit = ResGroupOps_GetMemoryLimit(group->groupId); ResGroupOps_SetMemoryLimitByValue(group->groupId, memory_limit + memory_inc); ResGroupOps_UnLockGroup(group->groupId, fd); @@ -3820,6 +3821,7 @@ groupApplyCgroupMemInc(ResGroupData *group) static void groupApplyCgroupMemDec(ResGroupData *group) { + ResGroupCompType comp = RESGROUP_COMP_TYPE_MEMORY; int32 memory_limit; int32 memory_dec; int fd; @@ -3827,7 +3829,7 @@ groupApplyCgroupMemDec(ResGroupData *group) Assert(LWLockHeldExclusiveByMe(ResGroupLock)); Assert(group->memGap > 0); - fd = ResGroupOps_LockGroup(group->groupId, "memory", true); + fd = ResGroupOps_LockGroup(group->groupId, comp, true); memory_limit = ResGroupOps_GetMemoryLimit(group->groupId); Assert(memory_limit > group->memGap); diff --git a/src/include/utils/resgroup-ops.h b/src/include/utils/resgroup-ops.h index 941914ef0251301e6e664176a7ff13c35895b4a4..567aae34e8798e751a3ce2bde16e5474507a1cc1 100644 --- a/src/include/utils/resgroup-ops.h +++ b/src/include/utils/resgroup-ops.h @@ -14,6 +14,22 @@ #ifndef RES_GROUP_OPS_H #define RES_GROUP_OPS_H +/* + * Resource Group underlying component types. + */ +typedef enum +{ + RESGROUP_COMP_TYPE_FIRST = 0, + RESGROUP_COMP_TYPE_UNKNOWN = -1, + + RESGROUP_COMP_TYPE_CPU, + RESGROUP_COMP_TYPE_CPUACCT, + RESGROUP_COMP_TYPE_MEMORY, + RESGROUP_COMP_TYPE_CPUSET, + + RESGROUP_COMP_TYPE_COUNT, +} ResGroupCompType; + #define RESGROUP_ROOT_ID (InvalidOid) /* * If group id is RESGROUP_COMPROOT_ID, it will build the root path of comp, @@ -32,11 +48,12 @@ * If cpu_rate_limit is set to this value, it means this feature is disabled */ #define CPU_RATE_LIMIT_DISABLED (-1) + /* * Interfaces for OS dependent operations */ -extern const char * ResGroupOps_Name(void); +extern const char *ResGroupOps_Name(void); extern bool ResGroupOps_Probe(void); extern void ResGroupOps_Bless(void); extern void ResGroupOps_Init(void); @@ -44,7 +61,7 @@ extern void ResGroupOps_AdjustGUCs(void); extern void ResGroupOps_CreateGroup(Oid group); extern void ResGroupOps_DestroyGroup(Oid group, bool migrate); extern void ResGroupOps_AssignGroup(Oid group, ResGroupCaps *caps, int pid); -extern int ResGroupOps_LockGroup(Oid group, const char *comp, bool block); +extern int ResGroupOps_LockGroup(Oid group, ResGroupCompType comp, bool block); extern void ResGroupOps_UnLockGroup(Oid group, int fd); extern void ResGroupOps_SetCpuRateLimit(Oid group, int cpu_rate_limit); extern void ResGroupOps_SetMemoryLimit(Oid group, int memory_limit);