提交 3635af62 编写于 作者: M Megvii Engine Team

style(atlas): add comment for async d2d

GitOrigin-RevId: 606a56ac4ecc7a3b76f2c10a93eef10fe330564a
上级 d68d4d1d
...@@ -51,6 +51,7 @@ void AtlasComputingContext::memcpy(void* dst, const void* src, ...@@ -51,6 +51,7 @@ void AtlasComputingContext::memcpy(void* dst, const void* src,
ACL_MEMCPY_HOST_TO_DEVICE)); ACL_MEMCPY_HOST_TO_DEVICE));
break; break;
case megcoreMemcpyDeviceToDevice: case megcoreMemcpyDeviceToDevice:
// async d2d is always faster than sync d2d because of SDMA
acl_check(aclrtMemcpyAsync(dst, size_in_bytes, src, size_in_bytes, acl_check(aclrtMemcpyAsync(dst, size_in_bytes, src, size_in_bytes,
ACL_MEMCPY_DEVICE_TO_DEVICE, m_ctx.stream)); ACL_MEMCPY_DEVICE_TO_DEVICE, m_ctx.stream));
break; break;
......
...@@ -230,14 +230,10 @@ void AtlasCompNodeImpl::peer_copy_to(Impl* dest_impl, void* dest, ...@@ -230,14 +230,10 @@ void AtlasCompNodeImpl::peer_copy_to(Impl* dest_impl, void* dest,
auto&& src_env = m_env.atlas_env(); auto&& src_env = m_env.atlas_env();
activate(); activate();
if (dst_env.device == src_env.device) { if (dst_env.device == src_env.device) {
#if 1 // async d2d use SDMA which is faster than sync ctrl cpu d2d
MGB_ATLAS_CHECK(aclrtMemcpyAsync(dest, size, src, size, MGB_ATLAS_CHECK(aclrtMemcpyAsync(dest, size, src, size,
ACL_MEMCPY_DEVICE_TO_DEVICE, ACL_MEMCPY_DEVICE_TO_DEVICE,
dst_env.stream)); dst_env.stream));
#else
MGB_ATLAS_CHECK(aclrtMemcpy(dest, size, src, size,
ACL_MEMCPY_DEVICE_TO_DEVICE));
#endif
} else { } else {
mgb_throw(MegBrainError, mgb_throw(MegBrainError,
"Atlas does not support peer copy between differents " "Atlas does not support peer copy between differents "
......
...@@ -361,7 +361,6 @@ void AtlasRuntimeOpr::scn_do_execute() { ...@@ -361,7 +361,6 @@ void AtlasRuntimeOpr::scn_do_execute() {
i, output(i)->cname()); i, output(i)->cname());
aclmdlAddDatasetBuffer(model_outputs, output_db); aclmdlAddDatasetBuffer(model_outputs, output_db);
} }
MGB_ATLAS_CHECK(aclmdlExecute(m_model_id, model_inputs, model_outputs)); MGB_ATLAS_CHECK(aclmdlExecute(m_model_id, model_inputs, model_outputs));
for (size_t i = 0; i < nr_inputs; ++i) { for (size_t i = 0; i < nr_inputs; ++i) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册