diff --git a/paddle/fluid/imperative/nccl_context.cc b/paddle/fluid/imperative/nccl_context.cc index f822894b42b0b58b83b1295ed589a2edfec77b71..15146f6c1204e63b3463c55888af56236cc98555 100644 --- a/paddle/fluid/imperative/nccl_context.cc +++ b/paddle/fluid/imperative/nccl_context.cc @@ -27,8 +27,8 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/place.h" namespace paddle { @@ -145,7 +145,7 @@ void NCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { void *src_ptr = src_tensor->data(); auto nccl_dtype = platform::ToNCCLDataType(src_tensor->type()); - PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast( + PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( src_ptr, src_tensor->numel(), nccl_dtype, 0, comm->comm(), stream)); } diff --git a/paddle/fluid/imperative/tests/CMakeLists.txt b/paddle/fluid/imperative/tests/CMakeLists.txt index 01a24872fbd7c23e97d3aba1c3e646564a02b6ed..32e982f1f15caa9263998a4d95deef636bf00a46 100644 --- a/paddle/fluid/imperative/tests/CMakeLists.txt +++ b/paddle/fluid/imperative/tests/CMakeLists.txt @@ -1,7 +1,7 @@ if(WIN32) cc_test(nccl_context_test SRCS nccl_context_test.cc DEPS device_context) else() - if (WITH_NCCL OR WITH_RCCL) + if (WITH_GLOO AND (WITH_NCCL OR WITH_RCCL)) cc_test(nccl_context_test SRCS nccl_context_test.cc DEPS nccl_context) cc_test(heter_ccl_context_test SRCS heter_ccl_context_test.cc DEPS heter_ccl_context nccl_context imperative_gloo_context gloo_context gloo_wrapper gloo fs shell) #set_tests_properties(heter_ccl_context_test PROPERTIES LABELS "RUN_TYPE=DIST") diff --git a/paddle/fluid/imperative/tests/heter_ccl_context_test.cc b/paddle/fluid/imperative/tests/heter_ccl_context_test.cc index c40a5fc52ceb86bd516a21f6ca4443bde42c08bc..d36743510e5ba3e8bfe17e37df78777e5dc969c5 100644 --- a/paddle/fluid/imperative/tests/heter_ccl_context_test.cc +++ b/paddle/fluid/imperative/tests/heter_ccl_context_test.cc @@ -79,7 +79,7 @@ void AllReduceByStream(int local_rank, int device_id) { } TEST(AllReduceByStream, Run) { - if (platform::GetCUDADeviceCount() >= 2) { + if (platform::GetGPUDeviceCount() >= 2) { std::thread t0(AllReduceByStream, 0, 0); std::thread t1(AllReduceByStream, 1, 1); t0.join(); diff --git a/paddle/fluid/imperative/tests/nccl_context_test.cc b/paddle/fluid/imperative/tests/nccl_context_test.cc index b56444104f2779d7f56e2c945ac79063f6aac275..401e4e324eb892556a058bc799cd2e973050f87c 100644 --- a/paddle/fluid/imperative/tests/nccl_context_test.cc +++ b/paddle/fluid/imperative/tests/nccl_context_test.cc @@ -111,7 +111,7 @@ void Broadcast(int local_rank, int device_id) { } TEST(Broadcast, Run) { - if (platform::GetCUDADeviceCount() >= 2) { + if (platform::GetGPUDeviceCount() >= 2) { std::thread t0(Broadcast, 0, 0); std::thread t1(Broadcast, 1, 1); t0.join();