Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
ed7113f3
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
9 个月 前同步成功
通知
200
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
ed7113f3
编写于
3月 28, 2022
作者:
X
xiongxinlei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
change the vector output to numpy.array
上级
5ae57206
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
211 addition
and
256 deletion
+211
-256
dataset/voxceleb/voxceleb1.py
dataset/voxceleb/voxceleb1.py
+14
-10
dataset/voxceleb/voxceleb2.py
dataset/voxceleb/voxceleb2.py
+15
-14
demos/audio_searching/src/operations/load.py
demos/audio_searching/src/operations/load.py
+3
-2
demos/speaker_verification/README.md
demos/speaker_verification/README.md
+79
-99
demos/speaker_verification/README_cn.md
demos/speaker_verification/README_cn.md
+78
-98
examples/voxceleb/README.md
examples/voxceleb/README.md
+0
-6
examples/voxceleb/sv0/local/data_prepare.py
examples/voxceleb/sv0/local/data_prepare.py
+0
-1
paddleaudio/paddleaudio/datasets/__init__.py
paddleaudio/paddleaudio/datasets/__init__.py
+1
-1
paddleaudio/paddleaudio/datasets/rirs_noises.py
paddleaudio/paddleaudio/datasets/rirs_noises.py
+0
-4
paddleaudio/paddleaudio/datasets/voxceleb.py
paddleaudio/paddleaudio/datasets/voxceleb.py
+1
-3
paddlespeech/cli/vector/infer.py
paddlespeech/cli/vector/infer.py
+5
-3
paddlespeech/vector/exps/ecapa_tdnn/test.py
paddlespeech/vector/exps/ecapa_tdnn/test.py
+0
-2
paddlespeech/vector/io/augment.py
paddlespeech/vector/io/augment.py
+0
-2
paddlespeech/vector/io/signal_processing.py
paddlespeech/vector/io/signal_processing.py
+0
-2
tests/unit/vector/conftest.py
tests/unit/vector/conftest.py
+15
-1
tests/unit/vector/test_augment.py
tests/unit/vector/test_augment.py
+0
-8
未找到文件。
dataset/voxceleb/voxceleb1.py
浏览文件 @
ed7113f3
...
...
@@ -63,13 +63,15 @@ TEST_TARGET_DATA = "vox1_test_wav.zip vox1_test_wav.zip 185fdc63c3c739954633d503
TRIAL_BASE_URL
=
"https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/"
TRIAL_LIST
=
{
"veri_test.txt"
:
"29fc7cc1c5d59f0816dc15d6e8be60f7"
,
# voxceleb1
"veri_test2.txt"
:
"b73110731c9223c1461fe49cb48dddfc"
,
# voxceleb1(cleaned)
"list_test_hard.txt"
:
"21c341b6b2168eea2634df0fb4b8fff1"
,
# voxceleb1-H
"list_test_hard2.txt"
:
"857790e09d579a68eb2e339a090343c8"
,
# voxceleb1-H(cleaned)
"list_test_all.txt"
:
"b9ecf7aa49d4b656aa927a8092844e4a"
,
# voxceleb1-E
"list_test_all2.txt"
:
"a53e059deb562ffcfc092bf5d90d9f3a"
# voxceleb1-E(cleaned)
}
"veri_test.txt"
:
"29fc7cc1c5d59f0816dc15d6e8be60f7"
,
# voxceleb1
"veri_test2.txt"
:
"b73110731c9223c1461fe49cb48dddfc"
,
# voxceleb1(cleaned)
"list_test_hard.txt"
:
"21c341b6b2168eea2634df0fb4b8fff1"
,
# voxceleb1-H
"list_test_hard2.txt"
:
"857790e09d579a68eb2e339a090343c8"
,
# voxceleb1-H(cleaned)
"list_test_all.txt"
:
"b9ecf7aa49d4b656aa927a8092844e4a"
,
# voxceleb1-E
"list_test_all2.txt"
:
"a53e059deb562ffcfc092bf5d90d9f3a"
# voxceleb1-E(cleaned)
}
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
...
...
@@ -176,6 +178,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
# create the manifest file
create_manifest
(
data_dir
=
target_dir
,
manifest_path_prefix
=
manifest_path
)
def
prepare_trial
(
base_url
,
data_list
,
target_dir
):
if
not
os
.
path
.
exists
(
target_dir
):
os
.
makedirs
(
target_dir
)
...
...
@@ -185,10 +188,12 @@ def prepare_trial(base_url, data_list, target_dir):
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
target_dir
,
trial
)):
download_url
=
" --no-check-certificate "
+
base_url
+
"/"
+
trial
download
(
url
=
download_url
,
md5sum
=
md5sum
,
target_dir
=
target_dir
)
def
main
():
if
args
.
target_dir
.
startswith
(
'~'
):
args
.
target_dir
=
os
.
path
.
expanduser
(
args
.
target_dir
)
# prepare the vox1 dev data
prepare_dataset
(
base_url
=
BASE_URL
,
...
...
@@ -209,8 +214,7 @@ def main():
prepare_trial
(
base_url
=
TRIAL_BASE_URL
,
data_list
=
TRIAL_LIST
,
target_dir
=
os
.
path
.
dirname
(
args
.
manifest_prefix
)
)
target_dir
=
os
.
path
.
dirname
(
args
.
manifest_prefix
))
print
(
"Manifest prepare done!"
)
...
...
dataset/voxceleb/voxceleb2.py
浏览文件 @
ed7113f3
...
...
@@ -22,12 +22,10 @@ import codecs
import
glob
import
json
import
os
import
subprocess
from
pathlib
import
Path
import
soundfile
from
utils.utility
import
check_md5sum
from
utils.utility
import
download
from
utils.utility
import
unzip
...
...
@@ -40,9 +38,8 @@ BASE_URL = "--no-check-certificate https://www.robots.ox.ac.uk/~vgg/data/voxcele
DEV_DATA_URL
=
BASE_URL
+
'/vox2_aac.zip'
DEV_MD5SUM
=
"bbc063c46078a602ca71605645c2a402"
# test data
TEST_DATA_URL
=
BASE_URL
+
'/vox2_test_aac.zip'
TEST_DATA_URL
=
BASE_URL
+
'/vox2_test_aac.zip'
TEST_MD5SUM
=
"0d2b3ea430a821c33263b5ea37ede312"
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
...
...
@@ -56,14 +53,16 @@ parser.add_argument(
default
=
"manifest"
,
type
=
str
,
help
=
"Filepath prefix for output manifests. (default: %(default)s)"
)
parser
.
add_argument
(
"--download"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Download the voxceleb2 dataset. (default: %(default)s)"
)
parser
.
add_argument
(
"--generate"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Generate the manifest files. (default: %(default)s)"
)
parser
.
add_argument
(
"--download"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Download the voxceleb2 dataset. (default: %(default)s)"
)
parser
.
add_argument
(
"--generate"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Generate the manifest files. (default: %(default)s)"
)
args
=
parser
.
parse_args
()
...
...
@@ -138,7 +137,7 @@ def download_dataset(url, md5sum, target_dir, dataset):
def
main
():
if
args
.
target_dir
.
startswith
(
'~'
):
args
.
target_dir
=
os
.
path
.
expanduser
(
args
.
target_dir
)
# download and unpack the vox2-dev data
print
(
"download: {}"
.
format
(
args
.
download
))
if
args
.
download
:
...
...
@@ -157,7 +156,9 @@ def main():
print
(
"VoxCeleb2 download is done!"
)
if
args
.
generate
:
create_manifest
(
args
.
target_dir
,
manifest_path_prefix
=
args
.
manifest_prefix
)
create_manifest
(
args
.
target_dir
,
manifest_path_prefix
=
args
.
manifest_prefix
)
if
__name__
==
'__main__'
:
main
()
demos/audio_searching/src/operations/load.py
浏览文件 @
ed7113f3
...
...
@@ -26,8 +26,9 @@ def get_audios(path):
"""
supported_formats
=
[
".wav"
,
".mp3"
,
".ogg"
,
".flac"
,
".m4a"
]
return
[
item
for
sublist
in
[[
os
.
path
.
join
(
dir
,
file
)
for
file
in
files
]
for
dir
,
_
,
files
in
list
(
os
.
walk
(
path
))]
item
for
sublist
in
[[
os
.
path
.
join
(
dir
,
file
)
for
file
in
files
]
for
dir
,
_
,
files
in
list
(
os
.
walk
(
path
))]
for
item
in
sublist
if
os
.
path
.
splitext
(
item
)[
1
]
in
supported_formats
]
...
...
demos/speaker_verification/README.md
浏览文件 @
ed7113f3
...
...
@@ -46,56 +46,46 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
Output:
```
bash
demo
{
'dim'
: 192,
'embedding'
: array
([
-5
.749211 , 9.505463 ,
-8
.200284 ,
-5
.2075014 ,
5.3940268 ,
-3
.04878 , 1.611095 , 10.127234 ,
-10
.534177 ,
-15
.821609 , 1.2032688 ,
-0
.35080156,
1.2629458 ,
-12
.643498 ,
-2
.5758228 ,
-11
.343508 ,
2.3385992 ,
-8
.719341 , 14.213509 , 15.404744 ,
-0
.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 ,
-8
.77959 , 7.0576906 , 4.648855 ,
-1
.3089896 ,
-23
.294737 , 8.013747 , 13.891729 ,
-9
.926753 , 5.655307 ,
-5
.9422326 ,
-22
.842539 ,
0.6293588 ,
-18
.46266 ,
-10
.811862 , 9.8192625 ,
3.0070958 , 3.8072643 ,
-2
.3861165 , 3.0821571 ,
-14
.739942 , 1.7594414 ,
-0
.6485091 , 4.485623 ,
2.0207152 , 7.264915 ,
-6
.40137 , 23.63524 ,
2.9711294 ,
-22
.708025 , 9.93719 , 20.354511 ,
-10
.324688 ,
-0
.700492 ,
-8
.783211 ,
-5
.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 ,
-2
.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 ,
-2
.1832209 ,
-23
.908653 ,
-6
.4799504 ,
-4
.5365124 ,
-9
.224193 ,
14.568347 ,
-10
.568833 , 4.982321 ,
-4
.342062 ,
0.0914714 , 12.645902 ,
-5
.74285 ,
-3
.2141201 ,
-2
.7173362 ,
-6
.680575 , 0.4757669 ,
-5
.035051 ,
-6
.7964664 , 16.865469 ,
-11
.54324 , 7.681869 ,
0.44475392, 9.708182 ,
-8
.932846 , 0.4123232 ,
-4
.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 ,
-18
.078873 ,
6.2983274 ,
-0
.7500531 ,
-2
.725033 ,
-7
.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2
.8873312 , 7.1352735 ,
-16
.79663 , 18.495346 ,
-14
.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4
.875734 ,
-3
.0836344 ,
-2
.9999814 , 13.751918 ,
6.448228 ,
-11
.924197 , 2.171869 , 2.0423572 ,
-6
.173772 , 10.778437 , 25.77281 ,
-4
.9495463 ,
14.57806 , 0.3044315 , 2.6132357 ,
-7
.591999 ,
-2
.076944 , 9.025118 , 1.7834753 ,
-3
.1799617 ,
-4
.9401326 , 23.465864 , 5.1685796 ,
-9
.018578 ,
9.037825 ,
-4
.4150195 , 6.859591 ,
-12
.274467 ,
-0
.88911164, 5.186309 ,
-3
.9988663 ,
-13
.638606 ,
-9
.925445 ,
-0
.06329413,
-3
.6709652 ,
-12
.397416 ,
-12
.719869 ,
-1
.395601 , 2.1150916 , 5.7381287 ,
-4
.4691963 ,
-3
.82819 ,
-0
.84233856,
-1
.1604277 ,
-13
.490127 , 8.731719 ,
-20
.778936 ,
-11
.495662 ,
5.8033476 ,
-4
.752041 , 10.833007 ,
-6
.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 ,
-9
.501399 , 7.2315617 ,
-6
.417456 , 1.3333273 , 11.872697 ,
-0
.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8
.704245 , 6.224871 ,
-3
.2701402 ,
-11
.508579
]
,
dtype
=
float32
)}
```
bash
demo
[
-5
.749211 9.505463
-8
.200284
-5
.2075014 5.3940268
-3
.04878 1.611095 10.127234
-10
.534177
-15
.821609
1.2032688
-0
.35080156 1.2629458
-12
.643498
-2
.5758228
-11
.343508 2.3385992
-8
.719341 14.213509 15.404744
-0
.39327756 6.338786 2.688887 8.7104025 17.469526
-8
.77959 7.0576906 4.648855
-1
.3089896
-23
.294737
8.013747 13.891729
-9
.926753 5.655307
-5
.9422326
-22
.842539 0.6293588
-18
.46266
-10
.811862 9.8192625
3.0070958 3.8072643
-2
.3861165 3.0821571
-14
.739942
1.7594414
-0
.6485091 4.485623 2.0207152 7.264915
-6
.40137 23.63524 2.9711294
-22
.708025 9.93719
20.354511
-10
.324688
-0
.700492
-8
.783211
-5
.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696
-2
.3826702 10.605882 3.9112158 3.1500628
15.859915
-2
.1832209
-23
.908653
-6
.4799504
-4
.5365124
-9
.224193 14.568347
-10
.568833 4.982321
-4
.342062
0.0914714 12.645902
-5
.74285
-3
.2141201
-2
.7173362
-6
.680575 0.4757669
-5
.035051
-6
.7964664 16.865469
-11
.54324 7.681869 0.44475392 9.708182
-8
.932846
0.4123232
-4
.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183
-18
.078873 6.2983274
-0
.7500531
-2
.725033
-7
.6027865 3.3404543 2.990815
4.010979 11.000591
-2
.8873312 7.1352735
-16
.79663
18.495346
-14
.293832 7.89578 2.2714825 22.976387
-4
.875734
-3
.0836344
-2
.9999814 13.751918 6.448228
-11
.924197 2.171869 2.0423572
-6
.173772 10.778437
25.77281
-4
.9495463 14.57806 0.3044315 2.6132357
-7
.591999
-2
.076944 9.025118 1.7834753
-3
.1799617
-4
.9401326 23.465864 5.1685796
-9
.018578 9.037825
-4
.4150195 6.859591
-12
.274467
-0
.88911164 5.186309
-3
.9988663
-13
.638606
-9
.925445
-0
.06329413
-3
.6709652
-12
.397416
-12
.719869
-1
.395601 2.1150916 5.7381287
-4
.4691963
-3
.82819
-0
.84233856
-1
.1604277
-13
.490127
8.731719
-20
.778936
-11
.495662 5.8033476
-4
.752041
10.833007
-6
.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036
-9
.501399 7.2315617
-6
.417456 1.3333273 11.872697
-0
.30664724 8.8845
6.5569253 4.7948146 0.03662816
-8
.704245 6.224871
-3
.2701402
-11
.508579
]
```
-
Python API
...
...
@@ -118,55 +108,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
Output:
```
bash
# Vector Result:
{
'dim'
: 192,
'embedding'
: array
([
-5
.749211 , 9.505463 ,
-8
.200284 ,
-5
.2075014 ,
5.3940268 ,
-3
.04878 , 1.611095 , 10.127234 ,
-10
.534177 ,
-15
.821609 , 1.2032688 ,
-0
.35080156,
1.2629458 ,
-12
.643498 ,
-2
.5758228 ,
-11
.343508 ,
2.3385992 ,
-8
.719341 , 14.213509 , 15.404744 ,
-0
.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 ,
-8
.77959 , 7.0576906 , 4.648855 ,
-1
.3089896 ,
-23
.294737 , 8.013747 , 13.891729 ,
-9
.926753 , 5.655307 ,
-5
.9422326 ,
-22
.842539 ,
0.6293588 ,
-18
.46266 ,
-10
.811862 , 9.8192625 ,
3.0070958 , 3.8072643 ,
-2
.3861165 , 3.0821571 ,
-14
.739942 , 1.7594414 ,
-0
.6485091 , 4.485623 ,
2.0207152 , 7.264915 ,
-6
.40137 , 23.63524 ,
2.9711294 ,
-22
.708025 , 9.93719 , 20.354511 ,
-10
.324688 ,
-0
.700492 ,
-8
.783211 ,
-5
.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 ,
-2
.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 ,
-2
.1832209 ,
-23
.908653 ,
-6
.4799504 ,
-4
.5365124 ,
-9
.224193 ,
14.568347 ,
-10
.568833 , 4.982321 ,
-4
.342062 ,
0.0914714 , 12.645902 ,
-5
.74285 ,
-3
.2141201 ,
-2
.7173362 ,
-6
.680575 , 0.4757669 ,
-5
.035051 ,
-6
.7964664 , 16.865469 ,
-11
.54324 , 7.681869 ,
0.44475392, 9.708182 ,
-8
.932846 , 0.4123232 ,
-4
.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 ,
-18
.078873 ,
6.2983274 ,
-0
.7500531 ,
-2
.725033 ,
-7
.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2
.8873312 , 7.1352735 ,
-16
.79663 , 18.495346 ,
-14
.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4
.875734 ,
-3
.0836344 ,
-2
.9999814 , 13.751918 ,
6.448228 ,
-11
.924197 , 2.171869 , 2.0423572 ,
-6
.173772 , 10.778437 , 25.77281 ,
-4
.9495463 ,
14.57806 , 0.3044315 , 2.6132357 ,
-7
.591999 ,
-2
.076944 , 9.025118 , 1.7834753 ,
-3
.1799617 ,
-4
.9401326 , 23.465864 , 5.1685796 ,
-9
.018578 ,
9.037825 ,
-4
.4150195 , 6.859591 ,
-12
.274467 ,
-0
.88911164, 5.186309 ,
-3
.9988663 ,
-13
.638606 ,
-9
.925445 ,
-0
.06329413,
-3
.6709652 ,
-12
.397416 ,
-12
.719869 ,
-1
.395601 , 2.1150916 , 5.7381287 ,
-4
.4691963 ,
-3
.82819 ,
-0
.84233856,
-1
.1604277 ,
-13
.490127 , 8.731719 ,
-20
.778936 ,
-11
.495662 ,
5.8033476 ,
-4
.752041 , 10.833007 ,
-6
.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 ,
-9
.501399 , 7.2315617 ,
-6
.417456 , 1.3333273 , 11.872697 ,
-0
.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8
.704245 , 6.224871 ,
-3
.2701402 ,
-11
.508579
]
,
dtype
=
float32
)}
[
-5
.749211 9.505463
-8
.200284
-5
.2075014 5.3940268
-3
.04878 1.611095 10.127234
-10
.534177
-15
.821609
1.2032688
-0
.35080156 1.2629458
-12
.643498
-2
.5758228
-11
.343508 2.3385992
-8
.719341 14.213509 15.404744
-0
.39327756 6.338786 2.688887 8.7104025 17.469526
-8
.77959 7.0576906 4.648855
-1
.3089896
-23
.294737
8.013747 13.891729
-9
.926753 5.655307
-5
.9422326
-22
.842539 0.6293588
-18
.46266
-10
.811862 9.8192625
3.0070958 3.8072643
-2
.3861165 3.0821571
-14
.739942
1.7594414
-0
.6485091 4.485623 2.0207152 7.264915
-6
.40137 23.63524 2.9711294
-22
.708025 9.93719
20.354511
-10
.324688
-0
.700492
-8
.783211
-5
.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696
-2
.3826702 10.605882 3.9112158 3.1500628
15.859915
-2
.1832209
-23
.908653
-6
.4799504
-4
.5365124
-9
.224193 14.568347
-10
.568833 4.982321
-4
.342062
0.0914714 12.645902
-5
.74285
-3
.2141201
-2
.7173362
-6
.680575 0.4757669
-5
.035051
-6
.7964664 16.865469
-11
.54324 7.681869 0.44475392 9.708182
-8
.932846
0.4123232
-4
.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183
-18
.078873 6.2983274
-0
.7500531
-2
.725033
-7
.6027865 3.3404543 2.990815
4.010979 11.000591
-2
.8873312 7.1352735
-16
.79663
18.495346
-14
.293832 7.89578 2.2714825 22.976387
-4
.875734
-3
.0836344
-2
.9999814 13.751918 6.448228
-11
.924197 2.171869 2.0423572
-6
.173772 10.778437
25.77281
-4
.9495463 14.57806 0.3044315 2.6132357
-7
.591999
-2
.076944 9.025118 1.7834753
-3
.1799617
-4
.9401326 23.465864 5.1685796
-9
.018578 9.037825
-4
.4150195 6.859591
-12
.274467
-0
.88911164 5.186309
-3
.9988663
-13
.638606
-9
.925445
-0
.06329413
-3
.6709652
-12
.397416
-12
.719869
-1
.395601 2.1150916 5.7381287
-4
.4691963
-3
.82819
-0
.84233856
-1
.1604277
-13
.490127
8.731719
-20
.778936
-11
.495662 5.8033476
-4
.752041
10.833007
-6
.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036
-9
.501399 7.2315617
-6
.417456 1.3333273 11.872697
-0
.30664724 8.8845
6.5569253 4.7948146 0.03662816
-8
.704245 6.224871
-3
.2701402
-11
.508579
]
```
### 4.Pretrained Models
...
...
demos/speaker_verification/README_cn.md
浏览文件 @
ed7113f3
...
...
@@ -45,55 +45,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
输出:
```
bash
demo
{
'dim'
: 192,
'embedding'
: array
([
-5
.749211 , 9.505463 ,
-8
.200284 ,
-5
.2075014 ,
5.3940268 ,
-3
.04878 , 1.611095 , 10.127234 ,
-10
.534177 ,
-15
.821609 , 1.2032688 ,
-0
.35080156,
1.2629458 ,
-12
.643498 ,
-2
.5758228 ,
-11
.343508 ,
2.3385992 ,
-8
.719341 , 14.213509 , 15.404744 ,
-0
.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 ,
-8
.77959 , 7.0576906 , 4.648855 ,
-1
.3089896 ,
-23
.294737 , 8.013747 , 13.891729 ,
-9
.926753 , 5.655307 ,
-5
.9422326 ,
-22
.842539 ,
0.6293588 ,
-18
.46266 ,
-10
.811862 , 9.8192625 ,
3.0070958 , 3.8072643 ,
-2
.3861165 , 3.0821571 ,
-14
.739942 , 1.7594414 ,
-0
.6485091 , 4.485623 ,
2.0207152 , 7.264915 ,
-6
.40137 , 23.63524 ,
2.9711294 ,
-22
.708025 , 9.93719 , 20.354511 ,
-10
.324688 ,
-0
.700492 ,
-8
.783211 ,
-5
.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 ,
-2
.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 ,
-2
.1832209 ,
-23
.908653 ,
-6
.4799504 ,
-4
.5365124 ,
-9
.224193 ,
14.568347 ,
-10
.568833 , 4.982321 ,
-4
.342062 ,
0.0914714 , 12.645902 ,
-5
.74285 ,
-3
.2141201 ,
-2
.7173362 ,
-6
.680575 , 0.4757669 ,
-5
.035051 ,
-6
.7964664 , 16.865469 ,
-11
.54324 , 7.681869 ,
0.44475392, 9.708182 ,
-8
.932846 , 0.4123232 ,
-4
.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 ,
-18
.078873 ,
6.2983274 ,
-0
.7500531 ,
-2
.725033 ,
-7
.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2
.8873312 , 7.1352735 ,
-16
.79663 , 18.495346 ,
-14
.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4
.875734 ,
-3
.0836344 ,
-2
.9999814 , 13.751918 ,
6.448228 ,
-11
.924197 , 2.171869 , 2.0423572 ,
-6
.173772 , 10.778437 , 25.77281 ,
-4
.9495463 ,
14.57806 , 0.3044315 , 2.6132357 ,
-7
.591999 ,
-2
.076944 , 9.025118 , 1.7834753 ,
-3
.1799617 ,
-4
.9401326 , 23.465864 , 5.1685796 ,
-9
.018578 ,
9.037825 ,
-4
.4150195 , 6.859591 ,
-12
.274467 ,
-0
.88911164, 5.186309 ,
-3
.9988663 ,
-13
.638606 ,
-9
.925445 ,
-0
.06329413,
-3
.6709652 ,
-12
.397416 ,
-12
.719869 ,
-1
.395601 , 2.1150916 , 5.7381287 ,
-4
.4691963 ,
-3
.82819 ,
-0
.84233856,
-1
.1604277 ,
-13
.490127 , 8.731719 ,
-20
.778936 ,
-11
.495662 ,
5.8033476 ,
-4
.752041 , 10.833007 ,
-6
.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 ,
-9
.501399 , 7.2315617 ,
-6
.417456 , 1.3333273 , 11.872697 ,
-0
.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8
.704245 , 6.224871 ,
-3
.2701402 ,
-11
.508579
]
,
dtype
=
float32
)}
demo
[
-5
.749211 9.505463
-8
.200284
-5
.2075014 5.3940268
-3
.04878 1.611095 10.127234
-10
.534177
-15
.821609
1.2032688
-0
.35080156 1.2629458
-12
.643498
-2
.5758228
-11
.343508 2.3385992
-8
.719341 14.213509 15.404744
-0
.39327756 6.338786 2.688887 8.7104025 17.469526
-8
.77959 7.0576906 4.648855
-1
.3089896
-23
.294737
8.013747 13.891729
-9
.926753 5.655307
-5
.9422326
-22
.842539 0.6293588
-18
.46266
-10
.811862 9.8192625
3.0070958 3.8072643
-2
.3861165 3.0821571
-14
.739942
1.7594414
-0
.6485091 4.485623 2.0207152 7.264915
-6
.40137 23.63524 2.9711294
-22
.708025 9.93719
20.354511
-10
.324688
-0
.700492
-8
.783211
-5
.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696
-2
.3826702 10.605882 3.9112158 3.1500628
15.859915
-2
.1832209
-23
.908653
-6
.4799504
-4
.5365124
-9
.224193 14.568347
-10
.568833 4.982321
-4
.342062
0.0914714 12.645902
-5
.74285
-3
.2141201
-2
.7173362
-6
.680575 0.4757669
-5
.035051
-6
.7964664 16.865469
-11
.54324 7.681869 0.44475392 9.708182
-8
.932846
0.4123232
-4
.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183
-18
.078873 6.2983274
-0
.7500531
-2
.725033
-7
.6027865 3.3404543 2.990815
4.010979 11.000591
-2
.8873312 7.1352735
-16
.79663
18.495346
-14
.293832 7.89578 2.2714825 22.976387
-4
.875734
-3
.0836344
-2
.9999814 13.751918 6.448228
-11
.924197 2.171869 2.0423572
-6
.173772 10.778437
25.77281
-4
.9495463 14.57806 0.3044315 2.6132357
-7
.591999
-2
.076944 9.025118 1.7834753
-3
.1799617
-4
.9401326 23.465864 5.1685796
-9
.018578 9.037825
-4
.4150195 6.859591
-12
.274467
-0
.88911164 5.186309
-3
.9988663
-13
.638606
-9
.925445
-0
.06329413
-3
.6709652
-12
.397416
-12
.719869
-1
.395601 2.1150916 5.7381287
-4
.4691963
-3
.82819
-0
.84233856
-1
.1604277
-13
.490127
8.731719
-20
.778936
-11
.495662 5.8033476
-4
.752041
10.833007
-6
.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036
-9
.501399 7.2315617
-6
.417456 1.3333273 11.872697
-0
.30664724 8.8845
6.5569253 4.7948146 0.03662816
-8
.704245 6.224871
-3
.2701402
-11
.508579
]
```
-
Python API
...
...
@@ -116,55 +106,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
输出:
```
bash
# Vector Result:
{
'dim'
: 192,
'embedding'
: array
([
-5
.749211 , 9.505463 ,
-8
.200284 ,
-5
.2075014 ,
5.3940268 ,
-3
.04878 , 1.611095 , 10.127234 ,
-10
.534177 ,
-15
.821609 , 1.2032688 ,
-0
.35080156,
1.2629458 ,
-12
.643498 ,
-2
.5758228 ,
-11
.343508 ,
2.3385992 ,
-8
.719341 , 14.213509 , 15.404744 ,
-0
.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 ,
-8
.77959 , 7.0576906 , 4.648855 ,
-1
.3089896 ,
-23
.294737 , 8.013747 , 13.891729 ,
-9
.926753 , 5.655307 ,
-5
.9422326 ,
-22
.842539 ,
0.6293588 ,
-18
.46266 ,
-10
.811862 , 9.8192625 ,
3.0070958 , 3.8072643 ,
-2
.3861165 , 3.0821571 ,
-14
.739942 , 1.7594414 ,
-0
.6485091 , 4.485623 ,
2.0207152 , 7.264915 ,
-6
.40137 , 23.63524 ,
2.9711294 ,
-22
.708025 , 9.93719 , 20.354511 ,
-10
.324688 ,
-0
.700492 ,
-8
.783211 ,
-5
.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 ,
-2
.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 ,
-2
.1832209 ,
-23
.908653 ,
-6
.4799504 ,
-4
.5365124 ,
-9
.224193 ,
14.568347 ,
-10
.568833 , 4.982321 ,
-4
.342062 ,
0.0914714 , 12.645902 ,
-5
.74285 ,
-3
.2141201 ,
-2
.7173362 ,
-6
.680575 , 0.4757669 ,
-5
.035051 ,
-6
.7964664 , 16.865469 ,
-11
.54324 , 7.681869 ,
0.44475392, 9.708182 ,
-8
.932846 , 0.4123232 ,
-4
.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 ,
-18
.078873 ,
6.2983274 ,
-0
.7500531 ,
-2
.725033 ,
-7
.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2
.8873312 , 7.1352735 ,
-16
.79663 , 18.495346 ,
-14
.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4
.875734 ,
-3
.0836344 ,
-2
.9999814 , 13.751918 ,
6.448228 ,
-11
.924197 , 2.171869 , 2.0423572 ,
-6
.173772 , 10.778437 , 25.77281 ,
-4
.9495463 ,
14.57806 , 0.3044315 , 2.6132357 ,
-7
.591999 ,
-2
.076944 , 9.025118 , 1.7834753 ,
-3
.1799617 ,
-4
.9401326 , 23.465864 , 5.1685796 ,
-9
.018578 ,
9.037825 ,
-4
.4150195 , 6.859591 ,
-12
.274467 ,
-0
.88911164, 5.186309 ,
-3
.9988663 ,
-13
.638606 ,
-9
.925445 ,
-0
.06329413,
-3
.6709652 ,
-12
.397416 ,
-12
.719869 ,
-1
.395601 , 2.1150916 , 5.7381287 ,
-4
.4691963 ,
-3
.82819 ,
-0
.84233856,
-1
.1604277 ,
-13
.490127 , 8.731719 ,
-20
.778936 ,
-11
.495662 ,
5.8033476 ,
-4
.752041 , 10.833007 ,
-6
.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 ,
-9
.501399 , 7.2315617 ,
-6
.417456 , 1.3333273 , 11.872697 ,
-0
.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8
.704245 , 6.224871 ,
-3
.2701402 ,
-11
.508579
]
,
dtype
=
float32
)}
[
-5
.749211 9.505463
-8
.200284
-5
.2075014 5.3940268
-3
.04878 1.611095 10.127234
-10
.534177
-15
.821609
1.2032688
-0
.35080156 1.2629458
-12
.643498
-2
.5758228
-11
.343508 2.3385992
-8
.719341 14.213509 15.404744
-0
.39327756 6.338786 2.688887 8.7104025 17.469526
-8
.77959 7.0576906 4.648855
-1
.3089896
-23
.294737
8.013747 13.891729
-9
.926753 5.655307
-5
.9422326
-22
.842539 0.6293588
-18
.46266
-10
.811862 9.8192625
3.0070958 3.8072643
-2
.3861165 3.0821571
-14
.739942
1.7594414
-0
.6485091 4.485623 2.0207152 7.264915
-6
.40137 23.63524 2.9711294
-22
.708025 9.93719
20.354511
-10
.324688
-0
.700492
-8
.783211
-5
.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696
-2
.3826702 10.605882 3.9112158 3.1500628
15.859915
-2
.1832209
-23
.908653
-6
.4799504
-4
.5365124
-9
.224193 14.568347
-10
.568833 4.982321
-4
.342062
0.0914714 12.645902
-5
.74285
-3
.2141201
-2
.7173362
-6
.680575 0.4757669
-5
.035051
-6
.7964664 16.865469
-11
.54324 7.681869 0.44475392 9.708182
-8
.932846
0.4123232
-4
.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183
-18
.078873 6.2983274
-0
.7500531
-2
.725033
-7
.6027865 3.3404543 2.990815
4.010979 11.000591
-2
.8873312 7.1352735
-16
.79663
18.495346
-14
.293832 7.89578 2.2714825 22.976387
-4
.875734
-3
.0836344
-2
.9999814 13.751918 6.448228
-11
.924197 2.171869 2.0423572
-6
.173772 10.778437
25.77281
-4
.9495463 14.57806 0.3044315 2.6132357
-7
.591999
-2
.076944 9.025118 1.7834753
-3
.1799617
-4
.9401326 23.465864 5.1685796
-9
.018578 9.037825
-4
.4150195 6.859591
-12
.274467
-0
.88911164 5.186309
-3
.9988663
-13
.638606
-9
.925445
-0
.06329413
-3
.6709652
-12
.397416
-12
.719869
-1
.395601 2.1150916 5.7381287
-4
.4691963
-3
.82819
-0
.84233856
-1
.1604277
-13
.490127
8.731719
-20
.778936
-11
.495662 5.8033476
-4
.752041
10.833007
-6
.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036
-9
.501399 7.2315617
-6
.417456 1.3333273 11.872697
-0
.30664724 8.8845
6.5569253 4.7948146 0.03662816
-8
.704245 6.224871
-3
.2701402
-11
.508579
]
```
### 4.预训练模型
...
...
examples/voxceleb/README.md
浏览文件 @
ed7113f3
...
...
@@ -48,9 +48,3 @@ You can do the conversion using ffmpeg https://gist.github.com/seungwonpark/4f2
|VoxCeleb1-H(cleaned) |list_test_hard2.txt | 550894 | 275488 | 275406 |
|VoxCeleb1-E | list_test_all.txt | 581480 | 290743 | 290737 |
|VoxCeleb1-E(cleaned) | list_test_all2.txt |579818 |289921 |289897 |
examples/voxceleb/sv0/local/data_prepare.py
浏览文件 @
ed7113f3
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
os
import
paddle
from
yacs.config
import
CfgNode
...
...
paddleaudio/paddleaudio/datasets/__init__.py
浏览文件 @
ed7113f3
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
from
.esc50
import
ESC50
from
.gtzan
import
GTZAN
from
.rirs_noises
import
OpenRIRNoise
from
.tess
import
TESS
from
.urban_sound
import
UrbanSound8K
from
.voxceleb
import
VoxCeleb
from
.rirs_noises
import
OpenRIRNoise
paddleaudio/paddleaudio/datasets/rirs_noises.py
浏览文件 @
ed7113f3
...
...
@@ -13,12 +13,9 @@
# limitations under the License.
import
collections
import
csv
import
glob
import
os
import
random
from
typing
import
Dict
from
typing
import
List
from
typing
import
Tuple
from
paddle.io
import
Dataset
from
tqdm
import
tqdm
...
...
@@ -26,7 +23,6 @@ from tqdm import tqdm
from
..backends
import
load
as
load_audio
from
..backends
import
save
as
save_wav
from
..utils
import
DATA_HOME
from
..utils
import
decompress
from
..utils.download
import
download_and_decompress
from
.dataset
import
feat_funcs
...
...
paddleaudio/paddleaudio/datasets/voxceleb.py
浏览文件 @
ed7113f3
...
...
@@ -17,9 +17,7 @@ import glob
import
os
import
random
from
multiprocessing
import
cpu_count
from
typing
import
Dict
from
typing
import
List
from
typing
import
Tuple
from
paddle.io
import
Dataset
from
pathos.multiprocessing
import
Pool
...
...
@@ -135,7 +133,7 @@ class VoxCeleb(Dataset):
# so, we check the vox1/wav dir status
print
(
f
"wav base path:
{
self
.
wav_path
}
"
)
if
not
os
.
path
.
isdir
(
self
.
wav_path
):
print
(
f
"start to download the voxceleb1 dataset"
)
print
(
"start to download the voxceleb1 dataset"
)
download_and_decompress
(
# multi-zip parts concatenate to vox1_dev_wav.zip
self
.
archieves_audio_dev
,
self
.
base_path
,
...
...
paddlespeech/cli/vector/infer.py
浏览文件 @
ed7113f3
...
...
@@ -82,7 +82,10 @@ class VectorExecutor(BaseExecutor):
choices
=
[
"spk"
],
help
=
"task type in vector domain"
)
self
.
parser
.
add_argument
(
"--input"
,
type
=
str
,
default
=
None
,
help
=
"Audio file to extract embedding."
)
"--input"
,
type
=
str
,
default
=
None
,
help
=
"Audio file to extract embedding."
)
self
.
parser
.
add_argument
(
"--sample_rate"
,
type
=
int
,
...
...
@@ -344,8 +347,7 @@ class VectorExecutor(BaseExecutor):
Union[str, os.PathLike]: audio embedding info
"""
embedding
=
self
.
_outputs
[
"embedding"
]
dim
=
embedding
.
shape
[
0
]
return
{
"dim"
:
dim
,
"embedding"
:
embedding
}
return
embedding
def
preprocess
(
self
,
model_type
:
str
,
input_file
:
Union
[
str
,
os
.
PathLike
]):
"""Extract the audio feat
...
...
paddlespeech/vector/exps/ecapa_tdnn/test.py
浏览文件 @
ed7113f3
...
...
@@ -12,12 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
ast
import
os
import
numpy
as
np
import
paddle
import
paddle.nn.functional
as
F
from
paddle.io
import
BatchSampler
from
paddle.io
import
DataLoader
from
tqdm
import
tqdm
...
...
paddlespeech/vector/io/augment.py
浏览文件 @
ed7113f3
...
...
@@ -14,7 +14,6 @@
# this is modified from SpeechBrain
# https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/lobes/augment.py
import
math
import
os
from
typing
import
List
import
numpy
as
np
...
...
@@ -22,7 +21,6 @@ import paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddleaudio
import
load
as
load_audio
from
paddleaudio.datasets.rirs_noises
import
OpenRIRNoise
from
paddlespeech.s2t.utils.log
import
Log
from
paddlespeech.vector.io.signal_processing
import
compute_amplitude
...
...
paddlespeech/vector/io/signal_processing.py
浏览文件 @
ed7113f3
...
...
@@ -11,8 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
numpy
as
np
import
paddle
...
...
tests/unit/vector/conftest.py
浏览文件 @
ed7113f3
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def
pytest_addoption
(
parser
):
parser
.
addoption
(
"--device"
,
action
=
"store"
,
default
=
"cpu"
)
...
...
@@ -8,4 +23,3 @@ def pytest_generate_tests(metafunc):
option_value
=
metafunc
.
config
.
option
.
device
if
"device"
in
metafunc
.
fixturenames
and
option_value
is
not
None
:
metafunc
.
parametrize
(
"device"
,
[
option_value
])
tests/unit/vector/test_augment.py
浏览文件 @
ed7113f3
...
...
@@ -11,15 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.io
import
BatchSampler
from
paddle.io
import
DataLoader
from
paddle.io
import
Dataset
def
test_add_noise
(
tmpdir
,
device
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录