Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
sq975
openpose
提交
04362b9d
O
openpose
项目概览
sq975
/
openpose
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
openpose
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
04362b9d
编写于
9月 14, 2018
作者:
R
Raaj
提交者:
Gines
9月 14, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
GPU Body Part Connector (#834)
上级
a03253a2
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
164 addition
and
19 deletion
+164
-19
include/openpose/pose/bodyPartConnectorBase.hpp
include/openpose/pose/bodyPartConnectorBase.hpp
+4
-3
src/openpose/pose/bodyPartConnectorBase.cpp
src/openpose/pose/bodyPartConnectorBase.cpp
+24
-1
src/openpose/pose/bodyPartConnectorBase.cu
src/openpose/pose/bodyPartConnectorBase.cu
+131
-10
src/openpose/pose/bodyPartConnectorCaffe.cpp
src/openpose/pose/bodyPartConnectorCaffe.cpp
+2
-4
src/openpose/pose/poseExtractorCaffe.cpp
src/openpose/pose/poseExtractorCaffe.cpp
+3
-1
未找到文件。
include/openpose/pose/bodyPartConnectorBase.hpp
浏览文件 @
04362b9d
...
...
@@ -13,11 +13,11 @@ namespace op
const
int
minSubsetCnt
,
const
T
minSubsetScore
,
const
T
scaleFactor
=
1.
f
);
template
<
typename
T
>
OP_API
void
connectBodyPartsGpu
(
Array
<
T
>&
poseKeypoints
,
Array
<
T
>&
poseScores
,
const
T
*
const
heatMapPtr
,
OP_API
void
connectBodyPartsGpu
(
Array
<
T
>&
poseKeypoints
,
Array
<
T
>&
poseScores
,
const
T
*
const
heatMap
Gpu
Ptr
,
const
T
*
const
peaksPtr
,
const
PoseModel
poseModel
,
const
Point
<
int
>&
heatMapSize
,
const
int
maxPeaks
,
const
T
interMinAboveThreshold
,
const
T
interThreshold
,
const
int
minSubsetCnt
,
const
T
minSubsetScore
,
const
T
scaleFactor
=
1.
f
,
const
T
*
const
heatMapGpuPtr
=
nullptr
,
const
T
*
const
peaksGpuPtr
=
nullptr
);
const
T
*
const
peaksGpuPtr
=
nullptr
);
// Private functions used by the 2 above functions
template
<
typename
T
>
...
...
@@ -25,7 +25,8 @@ namespace op
const
T
*
const
heatMapPtr
,
const
T
*
const
peaksPtr
,
const
PoseModel
poseModel
,
const
Point
<
int
>&
heatMapSize
,
const
int
maxPeaks
,
const
T
interThreshold
,
const
T
interMinAboveThreshold
,
const
std
::
vector
<
unsigned
int
>&
bodyPartPairs
,
const
unsigned
int
numberBodyParts
,
const
unsigned
int
numberBodyPartPairs
,
const
unsigned
int
subsetCounterIndex
);
const
unsigned
int
numberBodyPartPairs
,
const
unsigned
int
subsetCounterIndex
,
const
Array
<
T
>&
precomputedPAFs
=
Array
<
T
>
());
template
<
typename
T
>
OP_API
void
removeSubsetsBelowThresholds
(
std
::
vector
<
int
>&
validSubsetIndexes
,
int
&
numberPeople
,
...
...
src/openpose/pose/bodyPartConnectorBase.cpp
浏览文件 @
04362b9d
...
...
@@ -101,7 +101,7 @@ namespace op
const
T
*
const
heatMapPtr
,
const
T
*
const
peaksPtr
,
const
PoseModel
poseModel
,
const
Point
<
int
>&
heatMapSize
,
const
int
maxPeaks
,
const
T
interThreshold
,
const
T
interMinAboveThreshold
,
const
std
::
vector
<
unsigned
int
>&
bodyPartPairs
,
const
unsigned
int
numberBodyParts
,
const
unsigned
int
numberBodyPartPairs
,
const
unsigned
int
subsetCounterIndex
)
const
unsigned
int
numberBodyPartPairs
,
const
unsigned
int
subsetCounterIndex
,
const
Array
<
T
>&
precomputedPAFs
)
{
try
{
...
...
@@ -160,6 +160,9 @@ namespace op
auto
maxScoreIndex
=
-
1
;
if
(
poseModel
==
PoseModel
::
BODY_25E
&&
bodyPartPairsStar
[
bodyPartB
]
>
-
1
)
{
if
(
heatMapPtr
==
nullptr
)
error
(
"HeatMapPtr is null. GPU PAF not implemented for star architecture."
,
__LINE__
,
__FUNCTION__
,
__FILE__
);
const
auto
pairIndex2
=
bodyPartPairsStar
[
bodyPartB
];
const
auto
*
mapX0
=
heatMapPtr
+
(
numberBodyPartsAndBkg
+
pairIndex2
)
*
heatMapOffset
;
const
auto
*
mapY0
=
heatMapPtr
+
(
numberBodyPartsAndBkg
+
pairIndex2
+
1
)
*
heatMapOffset
;
...
...
@@ -293,6 +296,7 @@ namespace op
std
::
vector
<
std
::
tuple
<
double
,
int
,
int
>>
allABConnections
;
// Note: Problem of this function, if no right PAF between A and B, both elements are discarded.
// However, they should be added indepently, not discarded
if
(
heatMapPtr
!=
nullptr
)
{
const
auto
*
mapX
=
heatMapPtr
+
(
numberBodyPartsAndBkg
+
mapIdx
[
2
*
pairIndex
])
*
heatMapOffset
;
const
auto
*
mapY
=
heatMapPtr
+
(
numberBodyPartsAndBkg
+
mapIdx
[
2
*
pairIndex
+
1
])
*
heatMapOffset
;
...
...
@@ -336,6 +340,25 @@ namespace op
}
}
}
else
if
(
!
precomputedPAFs
.
empty
())
{
for
(
auto
i
=
1
;
i
<=
numberA
;
i
++
)
{
// E.g. neck-nose connection. For each nose
for
(
auto
j
=
1
;
j
<=
numberB
;
j
++
)
{
T
scoreAB
=
precomputedPAFs
.
at
({(
int
)
pairIndex
,
i
+
(
int
)
bodyPartA
,
j
+
(
int
)
bodyPartB
});
// E.g. neck-nose connection. If possible PAF between neck i, nose j --> add
// parts score + connection score
if
(
scoreAB
>
1e-6
)
allABConnections
.
emplace_back
(
std
::
make_tuple
(
scoreAB
,
i
,
j
));
}
}
//error("Not implemented", __LINE__, __FUNCTION__, __FILE__);
}
else
error
(
"Error. Should not reach here."
,
__LINE__
,
__FUNCTION__
,
__FILE__
);
// select the top minAB connection, assuming that each part occur only once
// sort rows in descending order based on parts + connection score
...
...
src/openpose/pose/bodyPartConnectorBase.cu
浏览文件 @
04362b9d
...
...
@@ -2,33 +2,153 @@
#include <openpose/pose/poseParameters.hpp>
#include <openpose/utilities/fastMath.hpp>
#include <openpose/pose/bodyPartConnectorBase.hpp>
#include <iostream>
namespace
op
{
template
<
typename
T
>
inline
__device__
int
intRoundGPU
(
const
T
a
)
{
return
int
(
a
+
0.5
f
);
}
template
<
typename
T
>
void
connectBodyPartsGpu
(
Array
<
T
>&
poseKeypoints
,
Array
<
T
>&
poseScores
,
const
T
*
const
heatMapPtr
,
inline
__device__
T
process
(
const
T
*
bodyPartA
,
const
T
*
bodyPartB
,
const
T
*
mapX
,
const
T
*
mapY
,
const
int
heatmapWidth
,
const
int
heatmapHeight
,
const
T
interThreshold
=
T
(
0.05
),
const
T
interMinAboveThreshold
=
T
(
0.95
),
const
T
renderThreshold
=
T
(
0.05
))
{
T
finalOutput
=
-
1
;
if
(
bodyPartA
[
2
]
<
renderThreshold
||
bodyPartB
[
2
]
<
renderThreshold
)
return
finalOutput
;
const
auto
vectorAToBX
=
bodyPartB
[
0
]
-
bodyPartA
[
0
];
const
auto
vectorAToBY
=
bodyPartB
[
1
]
-
bodyPartA
[
1
];
const
auto
vectorAToBMax
=
max
(
abs
(
vectorAToBX
),
abs
(
vectorAToBY
));
const
auto
numberPointsInLine
=
max
(
5
,
min
(
25
,
intRoundGPU
(
sqrt
(
5
*
vectorAToBMax
))));
const
auto
vectorNorm
=
T
(
sqrt
(
vectorAToBX
*
vectorAToBX
+
vectorAToBY
*
vectorAToBY
));
if
(
vectorNorm
>
1e-6
)
{
const
auto
sX
=
bodyPartA
[
0
];
const
auto
sY
=
bodyPartA
[
1
];
const
auto
vectorAToBNormX
=
vectorAToBX
/
vectorNorm
;
const
auto
vectorAToBNormY
=
vectorAToBY
/
vectorNorm
;
auto
sum
=
0.
;
auto
count
=
0
;
const
auto
vectorAToBXInLine
=
vectorAToBX
/
numberPointsInLine
;
const
auto
vectorAToBYInLine
=
vectorAToBY
/
numberPointsInLine
;
for
(
auto
lm
=
0
;
lm
<
numberPointsInLine
;
lm
++
)
{
const
auto
mX
=
min
(
heatmapWidth
-
1
,
intRoundGPU
(
sX
+
lm
*
vectorAToBXInLine
));
const
auto
mY
=
min
(
heatmapHeight
-
1
,
intRoundGPU
(
sY
+
lm
*
vectorAToBYInLine
));
const
auto
idx
=
mY
*
heatmapWidth
+
mX
;
const
auto
score
=
(
vectorAToBNormX
*
mapX
[
idx
]
+
vectorAToBNormY
*
mapY
[
idx
]);
if
(
score
>
interThreshold
)
{
sum
+=
score
;
count
++
;
}
}
// // L2 Hack
// int l2Dist = (int)sqrt(pow(vectorAToBX,2) + pow(vectorAToBY,2));
// if (l2Dist <= 2)
// count = numberPointsInLine;
// parts score + connection score
if
(
count
/
(
float
)
numberPointsInLine
>
interMinAboveThreshold
)
finalOutput
=
sum
/
count
;
}
return
finalOutput
;
}
template
<
typename
T
>
__global__
void
pafScoreKernel
(
T
*
finalOutputPtr
,
const
T
*
const
heatMapPtr
,
const
T
*
const
peaksPtr
,
const
unsigned
int
*
const
bodyPartPairsPtr
,
const
unsigned
int
*
const
mapIdxPtr
,
const
unsigned
int
poseMaxPeople
,
const
int
numberBodyPartPairs
,
const
int
heatmapWidth
,
const
int
heatmapHeight
)
{
const
auto
i
=
(
blockIdx
.
x
*
blockDim
.
x
)
+
threadIdx
.
x
;
const
auto
j
=
(
blockIdx
.
y
*
blockDim
.
y
)
+
threadIdx
.
y
;
const
auto
k
=
(
blockIdx
.
z
*
blockDim
.
z
)
+
threadIdx
.
z
;
if
(
i
<
numberBodyPartPairs
)
{
const
int
partA
=
bodyPartPairsPtr
[
i
*
2
];
const
int
partB
=
bodyPartPairsPtr
[
i
*
2
+
1
];
const
int
mapIdxX
=
mapIdxPtr
[
i
*
2
];
const
int
mapIdxY
=
mapIdxPtr
[
i
*
2
+
1
];
const
T
*
const
bodyPartA
=
peaksPtr
+
(
partA
*
poseMaxPeople
*
3
+
j
*
3
);
const
T
*
const
bodyPartB
=
peaksPtr
+
(
partB
*
poseMaxPeople
*
3
+
k
*
3
);
const
T
*
const
mapX
=
heatMapPtr
+
mapIdxX
*
heatmapWidth
*
heatmapHeight
;
const
T
*
const
mapY
=
heatMapPtr
+
mapIdxY
*
heatmapWidth
*
heatmapHeight
;
const
T
finalOutput
=
process
(
bodyPartA
,
bodyPartB
,
mapX
,
mapY
,
heatmapWidth
,
heatmapHeight
);
finalOutputPtr
[(
i
*
poseMaxPeople
+
j
)
*
poseMaxPeople
+
k
]
=
finalOutput
;
}
}
template
<
typename
T
>
void
connectBodyPartsGpu
(
Array
<
T
>&
poseKeypoints
,
Array
<
T
>&
poseScores
,
const
T
*
const
heatMapGpuPtr
,
const
T
*
const
peaksPtr
,
const
PoseModel
poseModel
,
const
Point
<
int
>&
heatMapSize
,
const
int
maxPeaks
,
const
T
interMinAboveThreshold
,
const
T
interThreshold
,
const
int
minSubsetCnt
,
const
T
minSubsetScore
,
const
T
scaleFactor
,
const
T
*
const
heatMapGpuPtr
,
const
T
*
const
peaksGpuPtr
)
const
T
*
const
peaksGpuPtr
)
{
try
{
// Parts Connection
const
auto
&
bodyPartPairs
=
getPosePartPairs
(
poseModel
);
const
auto
&
mapIdxOffset
=
getPoseMapIndex
(
poseModel
);
const
auto
numberBodyParts
=
getPoseNumberBodyParts
(
poseModel
);
const
auto
numberBodyPartPairs
=
bodyPartPairs
.
size
()
/
2
;
const
auto
subsetCounterIndex
=
numberBodyParts
;
// Update mapIdx
auto
mapIdx
=
mapIdxOffset
;
for
(
auto
&
i
:
mapIdx
)
i
+=
(
numberBodyParts
+
1
);
if
(
numberBodyParts
==
0
)
error
(
"Invalid value of numberBodyParts, it must be positive, not "
+
std
::
to_string
(
numberBodyParts
),
__LINE__
,
__FUNCTION__
,
__FILE__
);
// Upload required data to GPU
unsigned
int
*
bodyPartPairsGpuPtr
;
cudaMalloc
((
void
**
)
&
bodyPartPairsGpuPtr
,
bodyPartPairs
.
size
()
*
sizeof
(
unsigned
int
));
cudaMemcpy
(
bodyPartPairsGpuPtr
,
&
bodyPartPairs
[
0
],
bodyPartPairs
.
size
()
*
sizeof
(
unsigned
int
),
cudaMemcpyHostToDevice
);
unsigned
int
*
mapIdxGpuPtr
;
cudaMalloc
((
void
**
)
&
mapIdxGpuPtr
,
mapIdx
.
size
()
*
sizeof
(
unsigned
int
));
cudaMemcpy
(
mapIdxGpuPtr
,
&
mapIdx
[
0
],
mapIdx
.
size
()
*
sizeof
(
unsigned
int
),
cudaMemcpyHostToDevice
);
T
*
finalOutputGpuPtr
;
Array
<
T
>
finalOutputCpu
;
finalOutputCpu
.
reset
({(
int
)
numberBodyPartPairs
,
(
int
)
POSE_MAX_PEOPLE
,
(
int
)
POSE_MAX_PEOPLE
},
-
1
);
int
totalComputations
=
numberBodyPartPairs
*
POSE_MAX_PEOPLE
*
POSE_MAX_PEOPLE
;
cudaMalloc
((
void
**
)
&
finalOutputGpuPtr
,
totalComputations
*
sizeof
(
float
));
// Run Kernel
const
dim3
threadsPerBlock
{
4
,
8
,
8
};
//4 is good for BODY_25, 8 for COCO?
if
((
POSE_MAX_PEOPLE
+
1
)
%
threadsPerBlock
.
y
||
(
POSE_MAX_PEOPLE
+
1
)
%
threadsPerBlock
.
z
)
error
(
"Invalid value of POSE_MAX_PEOPLE, it must be multiple of 16, rather than "
+
std
::
to_string
(
POSE_MAX_PEOPLE
),
__LINE__
,
__FUNCTION__
,
__FILE__
);
int
pairBlocks
=
intRound
((
numberBodyPartPairs
/
threadsPerBlock
.
x
)
+
0.5
);
const
dim3
numBlocks
{(
unsigned
int
)
pairBlocks
,
(
POSE_MAX_PEOPLE
+
1
)
/
threadsPerBlock
.
y
,
(
POSE_MAX_PEOPLE
+
1
)
/
threadsPerBlock
.
z
};
pafScoreKernel
<<<
numBlocks
,
threadsPerBlock
>>>
(
finalOutputGpuPtr
,
heatMapGpuPtr
,
peaksGpuPtr
,
bodyPartPairsGpuPtr
,
mapIdxGpuPtr
,
POSE_MAX_PEOPLE
,
numberBodyPartPairs
,
heatMapSize
.
x
,
heatMapSize
.
y
);
cudaMemcpy
(
finalOutputCpu
.
getPtr
(),
finalOutputGpuPtr
,
totalComputations
*
sizeof
(
float
),
cudaMemcpyDeviceToHost
);
// std::vector<std::pair<std::vector<int>, double>> refers to:
// - std::vector<int>: [body parts locations, #body parts found]
// - double: subset score
const
T
*
const
tNullptr
=
nullptr
;
const
auto
subsets
=
generateInitialSubsets
(
heatMapP
tr
,
peaksPtr
,
poseModel
,
heatMapSize
,
maxPeaks
,
interThreshold
,
interMinAboveThreshold
,
bodyPartPairs
,
numberBodyParts
,
numberBodyPartPairs
,
subsetCounterIndex
);
tNullp
tr
,
peaksPtr
,
poseModel
,
heatMapSize
,
maxPeaks
,
interThreshold
,
interMinAboveThreshold
,
bodyPartPairs
,
numberBodyParts
,
numberBodyPartPairs
,
subsetCounterIndex
,
finalOutputCpu
);
// Delete people below the following thresholds:
// a) minSubsetCnt: removed if less than minSubsetCnt body parts
...
...
@@ -45,8 +165,9 @@ namespace op
peaksPtr
,
numberPeople
,
numberBodyParts
,
numberBodyPartPairs
);
// Differences w.r.t. CPU version for now
UNUSED
(
heatMapGpuPtr
);
UNUSED
(
peaksGpuPtr
);
cudaFree
(
bodyPartPairsGpuPtr
);
cudaFree
(
mapIdxGpuPtr
);
cudaFree
(
finalOutputGpuPtr
);
cudaCheck
(
__LINE__
,
__FUNCTION__
,
__FILE__
);
}
catch
(
const
std
::
exception
&
e
)
...
...
@@ -56,15 +177,15 @@ namespace op
}
template
void
connectBodyPartsGpu
(
Array
<
float
>
&
poseKeypoints
,
Array
<
float
>&
poseScores
,
const
float
*
const
heatMapPtr
,
const
float
*
const
peaksPtr
,
const
float
*
const
heatMap
Gpu
Ptr
,
const
float
*
const
peaksPtr
,
const
PoseModel
poseModel
,
const
Point
<
int
>&
heatMapSize
,
const
int
maxPeaks
,
const
float
interMinAboveThreshold
,
const
float
interThreshold
,
const
int
minSubsetCnt
,
const
float
minSubsetScore
,
const
float
scaleFactor
,
const
float
*
const
heatMapGpuPtr
,
const
float
*
const
peaksGpuPtr
);
const
float
*
const
peaksGpuPtr
);
template
void
connectBodyPartsGpu
(
Array
<
double
>
&
poseKeypoints
,
Array
<
double
>&
poseScores
,
const
double
*
const
heatMapPtr
,
const
double
*
const
peaksPtr
,
const
double
*
const
heatMap
Gpu
Ptr
,
const
double
*
const
peaksPtr
,
const
PoseModel
poseModel
,
const
Point
<
int
>&
heatMapSize
,
const
int
maxPeaks
,
const
double
interMinAboveThreshold
,
const
double
interThreshold
,
const
int
minSubsetCnt
,
const
double
minSubsetScore
,
const
double
scaleFactor
,
const
double
*
const
heatMapGpuPtr
,
const
double
*
const
peaksGpuPtr
);
const
double
*
const
peaksGpuPtr
);
}
src/openpose/pose/bodyPartConnectorCaffe.cpp
浏览文件 @
04362b9d
...
...
@@ -163,16 +163,14 @@ namespace op
{
#if defined USE_CAFFE && defined USE_CUDA
const
auto
heatMapsBlob
=
bottom
.
at
(
0
);
const
auto
*
const
heatMapsPtr
=
heatMapsBlob
->
cpu_data
();
const
auto
*
const
peaksPtr
=
bottom
.
at
(
1
)
->
cpu_data
();
const
auto
*
const
heatMapsGpuPtr
=
heatMapsBlob
->
gpu_data
();
const
auto
*
const
peaksGpuPtr
=
bottom
.
at
(
1
)
->
gpu_data
();
const
auto
maxPeaks
=
mTopSize
[
1
];
connectBodyPartsGpu
(
poseKeypoints
,
poseScores
,
heatMapsPtr
,
peaksPtr
,
mPoseModel
,
connectBodyPartsGpu
(
poseKeypoints
,
poseScores
,
heatMaps
Gpu
Ptr
,
peaksPtr
,
mPoseModel
,
Point
<
int
>
{
heatMapsBlob
->
shape
(
3
),
heatMapsBlob
->
shape
(
2
)},
maxPeaks
,
mInterMinAboveThreshold
,
mInterThreshold
,
mMinSubsetCnt
,
mMinSubsetScore
,
mScaleNetToOutput
,
heatMapsGpuPtr
,
peaksGpuPtr
);
mMinSubsetCnt
,
mMinSubsetScore
,
mScaleNetToOutput
,
peaksGpuPtr
);
#else
UNUSED
(
bottom
);
UNUSED
(
poseKeypoints
);
...
...
src/openpose/pose/poseExtractorCaffe.cpp
浏览文件 @
04362b9d
...
...
@@ -294,7 +294,6 @@ namespace op
upImpl
->
spBodyPartConnectorCaffe
->
setMinSubsetCnt
((
int
)
get
(
PoseProperty
::
ConnectMinSubsetCnt
));
upImpl
->
spBodyPartConnectorCaffe
->
setMinSubsetScore
((
float
)
get
(
PoseProperty
::
ConnectMinSubsetScore
));
// CUDA version not implemented yet
// #ifdef USE_CUDA
// upImpl->spBodyPartConnectorCaffe->Forward_gpu({upImpl->spHeatMapsBlob.get(),
// upImpl->spPeaksBlob.get()},
...
...
@@ -304,6 +303,9 @@ namespace op
upImpl
->
spPeaksBlob
.
get
()},
mPoseKeypoints
,
mPoseScores
);
// #endif
#ifdef USE_CUDA
cudaCheck
(
__LINE__
,
__FUNCTION__
,
__FILE__
);
#endif
#else
UNUSED
(
inputNetData
);
UNUSED
(
inputDataSize
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录