提交 a7b9ece4 编写于 作者: M Megvii Engine Team

feat(mgb/comp_node): add set_prealloc_config

GitOrigin-RevId: e725e7efdd78e4e8ae85ac963988aef55ee5f9c4
上级 066da0bf
......@@ -9,6 +9,7 @@
import os
from .core._imperative_rt.common import CompNode, DeviceType
from .core._imperative_rt.common import set_prealloc_config as _set_prealloc_config
__all__ = [
"is_cuda_available",
......@@ -16,6 +17,7 @@ __all__ = [
"get_default_device",
"set_default_device",
"set_prealloc_config",
"DeviceType",
]
......@@ -94,15 +96,15 @@ def set_prealloc_config(
alignment: int = 1,
min_req: int = 32 * 1024 * 1024,
max_overhead: int = 0,
growth_factor: float = 2.0,
device_type: str = "gpu",
growth_factor=2.0,
device_type=DeviceType.CUDA,
):
"""specifies how to pre-allocate from raw device allocator
"""specifies how to pre-allocate from raw dev allocator
:param alignment: specifies the alignment in byte
:param min_req: min request size in byte
:param max_overhead: max overhead above required size in byte
:growth_factor: request size = growth_factor * current allocated size
:param alignment: specifies the alignment in bytes.
:param min_req: min request size in bytes.
:param max_overhead: max overhead above required size in bytes.
:growth_factor: request size / cur allocated
:device_type: the device type
"""
......@@ -110,5 +112,4 @@ def set_prealloc_config(
assert min_req > 0
assert max_overhead >= 0
assert growth_factor >= 1
t = _str2device_type(device_type)
_set_prealloc_config(alignment, min_req, max_overhead, growth_factor, t)
_set_prealloc_config(alignment, min_req, max_overhead, growth_factor, device_type)
......@@ -165,6 +165,9 @@ void init_common(py::module m) {
.value("MULTITHREAD", CompNode::DeviceType::MULTITHREAD)
.value("MAX_DEVICE_ID", CompNode::DeviceType::MAX_DEVICE_ID);
m.def("set_prealloc_config", &CompNode::set_prealloc_config,
"specifies how to pre-allocate from raw dev allocator");
init_npy_num_bfloat16(m);
init_npy_num_intbx(m);
}
......@@ -12,6 +12,8 @@
#include "megbrain/comp_node.h"
#include "megbrain/comp_node_env.h"
#include "megbrain/graph/exc_extra_info.h"
#include "megbrain/common.h"
#include "megbrain/comp_node/alloc.h"
#include "./cuda/comp_node.h"
#include "./cpu/comp_node.h"
......@@ -420,6 +422,21 @@ void CompNode::activate() const {
static_cast<Impl*>(m_impl)->env().activate();
}
void CompNode::set_prealloc_config(
size_t alignment,
size_t min_req,
size_t max_overhead,
double growth_factor,
DeviceType device_type) {
switch (device_type) {
case DeviceType::CUDA:
CudaCompNode::set_prealloc_config(alignment, min_req, max_overhead, growth_factor);
break;
default:
mgb_log_warn("unsupported device type for set_prealloc_config");
};
}
void* CompNode::alloc_device(size_t size) const {
auto ret = m_impl->alloc_device(size);
static_cast<Impl*>(m_impl)->env().on_mem_event(size, true, ret);
......
......@@ -825,15 +825,16 @@ void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req,
using T = CudaCompNodeImpl::StaticData;
static std::aligned_storage_t<sizeof(T), alignof(T)> storage;
sdptr = new(&storage)T;
MGB_LOCK_GUARD(sdptr->mtx);
sdptr->prealloc_config.alignment = alignment;
sdptr->prealloc_config.min_req = min_req;
sdptr->prealloc_config.growth_factor = growth_factor;
sdptr->prealloc_config.max_overhead = max_overhead;
} else {
mgb_log_warn(
"failed to invoke set_prealloc_config; fallback to default configuration; "
"prealloc_config should be specified before any invocation of load_cuda");
"invalid call to set_prealloc_config, will fallback to "
"default config; "
"prealloc_config should be specified before any CUDA "
"memory allocation");
}
}
}
......@@ -858,6 +859,10 @@ CudaCompNode::Impl* CudaCompNode::load_cuda(const Locator&, const Locator&) {
void CudaCompNode::sync_all() {
}
void CudaCompNode::set_prealloc_config(size_t alignment, size_t min_req,
size_t max_overhead,
double growth_factor) {}
#undef err
#endif // MGB_CUDA
......
......@@ -32,9 +32,10 @@ namespace mgb {
static Impl* load_cuda(
const Locator &locator, const Locator &locator_logical);
static void sync_all();
static void set_prealloc_config(size_t alignment, size_t min_req,
size_t max_overhead, double growth_factor);
};
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -308,6 +308,14 @@ class CompNode {
*/
static void try_coalesce_all_free_memory();
/*
* \brief specifies how to pre-allocate from raw dev allocator
*
*/
static void set_prealloc_config(size_t alignment, size_t min_req,
size_t max_overhead, double growth_factor,
DeviceType device_type);
/* =================== synchronization ======================== */
class Event;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册