diff --git a/imperative/python/megengine/device.py b/imperative/python/megengine/device.py index 631e49d74cb05118c23102adafc37c2f42cade86..6fdc05a034f23f635d65229178af60f42606516f 100644 --- a/imperative/python/megengine/device.py +++ b/imperative/python/megengine/device.py @@ -15,6 +15,7 @@ __all__ = [ "get_device_count", "get_default_device", "set_default_device", + "set_prealloc_config", ] @@ -33,7 +34,7 @@ def _str2device_type(type_str: str, allow_unspec: bool = True): elif type_str == "GPU" or type_str == "CUDA": return DeviceType.CUDA else: - assert allow_unspec and str == "XPU", "bad device type" + assert allow_unspec and str == "XPU", "device type can only be cpu, gpu or xpu" return DeviceType.UNSPEC @@ -87,3 +88,27 @@ def get_default_device() -> str: set_default_device(os.getenv("MGE_DEFAULT_DEVICE", "xpux")) + + +def set_prealloc_config( + alignment: int = 1, + min_req: int = 32 * 1024 * 1024, + max_overhead: int = 0, + growth_factor: float = 2.0, + device_type: str = "gpu", +): + """specifies how to pre-allocate from raw device allocator + + :param alignment: specifies the alignment in byte + :param min_req: min request size in byte + :param max_overhead: max overhead above required size in byte + :growth_factor: request size = growth_factor * current allocated size + :device_type: the device type + + """ + assert alignment > 0 + assert min_req > 0 + assert max_overhead >= 0 + assert growth_factor >= 1 + t = _str2device_type(device_type) + _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, t) diff --git a/src/core/impl/comp_node/cuda/comp_node.cpp b/src/core/impl/comp_node/cuda/comp_node.cpp index 31aa33f70ae78318b22ec80ab67f490adefa0c12..b313f87ff149e265b7c6479b0298d6337a8d1200 100644 --- a/src/core/impl/comp_node/cuda/comp_node.cpp +++ b/src/core/impl/comp_node/cuda/comp_node.cpp @@ -815,6 +815,29 @@ size_t CudaCompNode::get_device_count(bool warn) { return cnt; } +void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, + size_t max_overhead, + double growth_factor) { + auto &&sdptr = CudaCompNodeImpl::sd; + { + MGB_LOCK_GUARD(CudaCompNodeImpl::sd_mtx); + if (!sdptr) { + using T = CudaCompNodeImpl::StaticData; + static std::aligned_storage_t storage; + sdptr = new(&storage)T; + MGB_LOCK_GUARD(sdptr->mtx); + sdptr->prealloc_config.alignment = alignment; + sdptr->prealloc_config.min_req = min_req; + sdptr->prealloc_config.growth_factor = growth_factor; + sdptr->prealloc_config.max_overhead = max_overhead; + } else { + mgb_log_warn( + "failed to invoke set_prealloc_config; fallback to default configuration; " + "prealloc_config should be specified before any invocation of load_cuda"); + } + } +} + #else bool CudaCompNode::available() { diff --git a/src/core/test/comp_node.cpp b/src/core/test/comp_node.cpp index e8b29bc76e6e39d00ae630adebcea8d84229ffa0..1a8e889f1b3947919ea576914029104695081129 100644 --- a/src/core/test/comp_node.cpp +++ b/src/core/test/comp_node.cpp @@ -290,6 +290,12 @@ TEST(TestCompNodeCuda, Uid) { ASSERT_NE(cn00.get_uid(), cn1.get_uid()); } +TEST(TestCompNodeCuda, set_prealloc_config) { + CompNode::set_prealloc_config( + 1024, 1024, 256 * 1024 * 1024, + 4, CompNode::DeviceType::CUDA); +} + #if MGB_CAMBRICON TEST(TestCompNodeCambricon, MemNode) {