diff --git a/imperative/python/megengine/device.py b/imperative/python/megengine/device.py
index 631e49d74cb05118c23102adafc37c2f42cade86..6fdc05a034f23f635d65229178af60f42606516f 100644
--- a/imperative/python/megengine/device.py
+++ b/imperative/python/megengine/device.py
@@ -15,6 +15,7 @@ __all__ = [
     "get_device_count",
     "get_default_device",
     "set_default_device",
+    "set_prealloc_config",
 ]
 
 
@@ -33,7 +34,7 @@ def _str2device_type(type_str: str, allow_unspec: bool = True):
     elif type_str == "GPU" or type_str == "CUDA":
         return DeviceType.CUDA
     else:
-        assert allow_unspec and str == "XPU", "bad device type"
+        assert allow_unspec and str == "XPU", "device type can only be cpu, gpu or xpu"
         return DeviceType.UNSPEC
 
 
@@ -87,3 +88,27 @@ def get_default_device() -> str:
 
 
 set_default_device(os.getenv("MGE_DEFAULT_DEVICE", "xpux"))
+
+
+def set_prealloc_config(
+    alignment: int = 1,
+    min_req: int = 32 * 1024 * 1024,
+    max_overhead: int = 0,
+    growth_factor: float = 2.0,
+    device_type: str = "gpu",
+):
+    """specifies how to pre-allocate from raw device allocator
+
+    :param alignment: specifies the alignment in byte
+    :param min_req: min request size in byte
+    :param max_overhead: max overhead above required size in byte
+    :growth_factor: request size = growth_factor * current allocated size
+    :device_type: the device type
+
+    """
+    assert alignment > 0
+    assert min_req > 0
+    assert max_overhead >= 0
+    assert growth_factor >= 1
+    t = _str2device_type(device_type)
+    _set_prealloc_config(alignment, min_req, max_overhead, growth_factor, t)
diff --git a/src/core/impl/comp_node/cuda/comp_node.cpp b/src/core/impl/comp_node/cuda/comp_node.cpp
index 31aa33f70ae78318b22ec80ab67f490adefa0c12..b313f87ff149e265b7c6479b0298d6337a8d1200 100644
--- a/src/core/impl/comp_node/cuda/comp_node.cpp
+++ b/src/core/impl/comp_node/cuda/comp_node.cpp
@@ -815,6 +815,29 @@ size_t CudaCompNode::get_device_count(bool warn) {
     return cnt;
 }
 
+void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req, 
+                                       size_t max_overhead,
+                                       double growth_factor) {
+    auto &&sdptr = CudaCompNodeImpl::sd;
+    {
+        MGB_LOCK_GUARD(CudaCompNodeImpl::sd_mtx);
+        if (!sdptr) {
+            using T = CudaCompNodeImpl::StaticData;
+            static std::aligned_storage_t<sizeof(T), alignof(T)> storage;
+            sdptr = new(&storage)T;
+            MGB_LOCK_GUARD(sdptr->mtx);
+            sdptr->prealloc_config.alignment = alignment;
+            sdptr->prealloc_config.min_req = min_req;
+            sdptr->prealloc_config.growth_factor = growth_factor;
+            sdptr->prealloc_config.max_overhead = max_overhead;
+        } else {
+            mgb_log_warn(
+                "failed to invoke set_prealloc_config; fallback to default configuration; "
+                "prealloc_config should be specified before any invocation of load_cuda");
+        }
+    }
+}
+
 #else
 
 bool CudaCompNode::available() {
diff --git a/src/core/test/comp_node.cpp b/src/core/test/comp_node.cpp
index e8b29bc76e6e39d00ae630adebcea8d84229ffa0..1a8e889f1b3947919ea576914029104695081129 100644
--- a/src/core/test/comp_node.cpp
+++ b/src/core/test/comp_node.cpp
@@ -290,6 +290,12 @@ TEST(TestCompNodeCuda, Uid) {
     ASSERT_NE(cn00.get_uid(), cn1.get_uid());
 }
 
+TEST(TestCompNodeCuda, set_prealloc_config) {
+    CompNode::set_prealloc_config(
+        1024, 1024, 256 * 1024 * 1024,
+        4, CompNode::DeviceType::CUDA);
+}
+
 
 #if MGB_CAMBRICON
 TEST(TestCompNodeCambricon, MemNode) {