提交 bcbfbbd7 编写于 作者: M Megvii Engine Team

fix(mgb): fix TensorRT runtime opr profiling

GitOrigin-RevId: 3545aa53b2ee215e64d22c89e94171fadb6b11b0
上级 702ed9ee
......@@ -50,17 +50,6 @@ void TensorRTProfiler::print_layer_times() {
printf("Total time: %4.3fms\n", total_time);
}
std::shared_ptr<json::Value> TensorRTProfiler::to_json() {
using namespace json;
auto prof_arr = Array::make();
for (auto&& rec : profile) {
auto&& item = Array::make();
item->add(String::make(rec.first));
item->add(Number::make(rec.second));
prof_arr->add(item);
}
return prof_arr;
}
#endif // MGB_ENABLE_JSON
......@@ -168,7 +157,7 @@ void TensorRTOpr::GpuAllocator::free(void* memory) {
void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,
CompNode comp_node_check,
nvinfer1::ICudaEngine* engine,
size_t batch) {
size_t batch, bool use_trt_profiler) {
auto comp_node = opr->comp_node();
// ICudaEngine is bound to the currently active device
......@@ -180,22 +169,11 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,
comp_node_check.to_string().c_str(),
comp_node.to_string().c_str());
}
#if MGB_ENABLE_JSON
auto pf_holder_pair =
opr->owner_graph()
->options()
.user_data.get_user_data<opr_profile::OprProfileHolder>();
if (m_has_profiler && !pf_holder_pair.second) {
m_context.reset();
m_has_profiler = false;
}
#endif
auto workspace_ptr = opr->output().back()->dev_tensor().raw_ptr();
bool should_reinit_device_memory =
!m_context || m_device_workspace_memory_ptr != workspace_ptr;
if (!m_context) {
m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}};
m_has_profiler = false;
}
m_trt_iobuf.resize(opr->input().size() + opr->output().size() - 1);
bool is_trt_opr = false;
......@@ -235,11 +213,7 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,
bool exec_success = false;
#if MGB_ENABLE_JSON
if (!pf_holder_pair.second) {
mgb_assert(!m_has_profiler,
"Invalid state of TensorRTRuntimeOpr: should not have "
"profiler.");
if (!use_trt_profiler) {
#if NV_TENSOR_RT_VERSION >= 6001
if (is_trt_opr)
exec_success = m_context->enqueueV2(m_trt_iobuf.data(),
......@@ -255,7 +229,6 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,
} else {
TensorRTProfiler trt_profiler;
m_context->setProfiler(&trt_profiler);
m_has_profiler = true;
// TensorRT documentation stated that IExecutionContext->execute
// "Synchronously execute inference on a batch", and it does not take a
// cudaStream_t, we expect it do a device synchronize. But it seems like
......@@ -272,24 +245,9 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,
exec_success = m_context->execute(batch, m_trt_iobuf.data());
#endif
mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname());
pf_holder_pair.first[0]->id2object_map[opr] = trt_profiler.to_json();
printf("TRT profile info of opr %s:\n", opr->name().c_str());
trt_profiler.print_layer_times();
}
#else
#if NV_TENSOR_RT_VERSION >= 6001
if (is_trt_opr)
exec_success = m_context->enqueueV2(m_trt_iobuf.data(),
env.cuda_env().stream, nullptr);
else
exec_success = m_context->enqueue(batch, m_trt_iobuf.data(),
env.cuda_env().stream, nullptr);
#else
exec_success = m_context->enqueue(batch, m_trt_iobuf.data(),
env.cuda_env().stream, nullptr);
#endif
mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname());
#endif
}
/* ========================== TensorRTOpr ========================== */
......
......@@ -50,11 +50,11 @@ class TensorRTManager {
std::vector<void*> m_trt_iobuf;
TensorRTUniquePtr<nvinfer1::IExecutionContext> m_context;
void* m_device_workspace_memory_ptr;
bool m_has_profiler;
public:
void exec(cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check,
nvinfer1::ICudaEngine* engine, size_t batch = 1);
nvinfer1::ICudaEngine* engine, size_t batch = 1,
bool use_trt_profiler = false);
void clear_trt_context() { m_context.reset(); }
......
......@@ -28,50 +28,6 @@ using namespace mgb;
using namespace nvinfer1;
using namespace opr;
TEST(TestOprTensorRT, Profile) {
REQUIRE_GPU(1);
intl::ConcatConvTensorRTNetwork net;
auto p = net.create_trt_network(true);
auto y2 = TensorRTOpr::make(TensorRTOpr::to_shared_ptr_builder(p.first),
TensorRTOpr::to_shared_ptr_network(p.second),
intl::TensorRTGraphFeatureBits::NCHW_FLOAT, {},
{net.x0, net.x1})[0];
HostTensorND host_z1;
HostTensorND host_z2;
auto func = net.graph->compile({make_callback_copy(net.y, host_z1),
make_callback_copy(y2, host_z2)});
{
mgb::GraphProfiler profiler(net.graph.get());
func->execute();
profiler.to_json()->writeto_fpath(
output_file("TestOprTensorRT.Profile.FromProfiler.json"));
auto prof_obj = *static_cast<json::Object*>(profiler.to_json().get());
auto record_obj =
*static_cast<json::Object*>(prof_obj["opr_internal_pf"].get());
auto opr_prof_arr = *static_cast<json::Array*>(
record_obj[y2.node()->owner_opr()->id_str()].get());
for (auto item_arr : opr_prof_arr.get_impl()) {
auto layer_info_arr = *static_cast<json::Array*>(item_arr.get());
auto layer_time =
*static_cast<json::Number*>(layer_info_arr[1].get());
mgb_assert(layer_time.get_impl() > 0, "Error occured in json.");
}
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}
// Run it again after profiler is not in existance.
func->execute();
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}
TEST(TestOprTensorRT, Basic) {
REQUIRE_GPU(1);
intl::SimpleTensorRTNetwork net;
......
......@@ -10,7 +10,6 @@
*/
#include "megbrain/comp_node_env.h"
#include "megbrain/plugin/profiler.h"
#include "megbrain/test/autocheck.h"
#include "megbrain/test/helper.h"
#include "megbrain/test/megdnn_helper.h"
......@@ -102,69 +101,6 @@ TEST(TestOprTensorRT, ConcatRuntimeBasic) {
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}
TEST(TestOprTensorRT, RuntimeProfile) {
REQUIRE_GPU(1);
intl::ConcatConvTensorRTNetwork net;
SymbolVar y2;
{
auto p = net.create_trt_network(false);
TensorRTUniquePtr<INetworkDefinition> trt_net{p.second, {}};
TensorRTUniquePtr<IBuilder> builder{p.first, {}};
builder->setMaxBatchSize(5);
#if NV_TENSOR_RT_VERSION >= 6001
TensorRTUniquePtr<IBuilderConfig> build_config{
builder->createBuilderConfig()};
auto cuda_engine =
builder->buildEngineWithConfig(*trt_net, *build_config);
#else
auto cuda_engine = builder->buildCudaEngine(*trt_net);
#endif
TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}};
FILE* fout = fopen(output_file("trt_cuda_engine").c_str(), "wb");
auto wr = fwrite(mem->data(), 1, mem->size(), fout);
mgb_assert(wr == mem->size());
fclose(fout);
y2 = TensorRTRuntimeOpr::make(
TensorRTRuntimeOpr::to_shared_ptr_engine(cuda_engine), {},
{net.x0, net.x1})[0];
}
HostTensorND host_z1;
HostTensorND host_z2;
auto func = net.graph->compile({make_callback_copy(net.y, host_z1),
make_callback_copy(y2, host_z2)});
{
mgb::GraphProfiler profiler(net.graph.get());
func->execute();
profiler.to_json()->writeto_fpath(output_file(
"TestOprTensorRT.RuntimeProfile.FromProfiler.json"));
auto prof_obj = *static_cast<json::Object*>(profiler.to_json().get());
auto record_obj =
*static_cast<json::Object*>(prof_obj["opr_internal_pf"].get());
auto opr_prof_arr = *static_cast<json::Array*>(
record_obj[y2.node()->owner_opr()->id_str()].get());
for (auto item_arr : opr_prof_arr.get_impl()) {
auto layer_info_arr = *static_cast<json::Array*>(item_arr.get());
auto layer_time =
*static_cast<json::Number*>(layer_info_arr[1].get());
mgb_assert(layer_time.get_impl() > 0, "Error occured in json.");
}
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}
// Run it again after profiler is not in existance.
func->execute();
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}
TEST(TestOprTensorRT, RuntimeChangeBatchSize) {
REQUIRE_GPU(1);
intl::SimpleTensorRTNetwork net;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册