未验证 提交 6944770f 编写于 作者: M Matheus Fillipe 提交者: GitHub

Export and replay generated wavs in toolbox (#402)

上级 f49f64f6
...@@ -7,6 +7,7 @@ numpy>=1.14.0 ...@@ -7,6 +7,7 @@ numpy>=1.14.0
scipy>=1.0.0 scipy>=1.0.0
tqdm tqdm
sounddevice sounddevice
SoundFile
Unidecode Unidecode
inflect inflect
PyQt5 PyQt5
......
...@@ -34,6 +34,9 @@ recognized_datasets = [ ...@@ -34,6 +34,9 @@ recognized_datasets = [
"VCTK-Corpus/wav48", "VCTK-Corpus/wav48",
] ]
#Maximum of generated wavs to keep on memory
MAX_WAVES = 15
class Toolbox: class Toolbox:
def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir, low_mem): def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir, low_mem):
sys.excepthook = self.excepthook sys.excepthook = self.excepthook
...@@ -43,6 +46,10 @@ class Toolbox: ...@@ -43,6 +46,10 @@ class Toolbox:
self.current_generated = (None, None, None, None) # speaker_name, spec, breaks, wav self.current_generated = (None, None, None, None) # speaker_name, spec, breaks, wav
self.synthesizer = None # type: Synthesizer self.synthesizer = None # type: Synthesizer
self.current_wav = None
self.waves_list = []
self.waves_count = 0
self.waves_namelist = []
# Initialize the events and the interface # Initialize the events and the interface
self.ui = UI() self.ui = UI()
...@@ -82,8 +89,17 @@ class Toolbox: ...@@ -82,8 +89,17 @@ class Toolbox:
self.ui.play_button.clicked.connect(func) self.ui.play_button.clicked.connect(func)
self.ui.stop_button.clicked.connect(self.ui.stop) self.ui.stop_button.clicked.connect(self.ui.stop)
self.ui.record_button.clicked.connect(self.record) self.ui.record_button.clicked.connect(self.record)
#Audio
self.ui.setup_audio_devices(Synthesizer.sample_rate) self.ui.setup_audio_devices(Synthesizer.sample_rate)
#Wav playback & save
func = lambda: self.replay_last_wav()
self.ui.replay_wav_button.clicked.connect(func)
func = lambda: self.export_current_wave()
self.ui.export_wav_button.clicked.connect(func)
self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav)
# Generation # Generation
func = lambda: self.synthesize() or self.vocode() func = lambda: self.synthesize() or self.vocode()
self.ui.generate_button.clicked.connect(func) self.ui.generate_button.clicked.connect(func)
...@@ -93,6 +109,15 @@ class Toolbox: ...@@ -93,6 +109,15 @@ class Toolbox:
# UMAP legend # UMAP legend
self.ui.clear_button.clicked.connect(self.clear_utterances) self.ui.clear_button.clicked.connect(self.clear_utterances)
def set_current_wav(self, index):
self.current_wav = self.waves_list[index]
def export_current_wave(self):
self.ui.save_audio_file(self.current_wav, Synthesizer.sample_rate)
def replay_last_wav(self):
self.ui.play(self.current_wav, Synthesizer.sample_rate)
def reset_ui(self, encoder_models_dir, synthesizer_models_dir, vocoder_models_dir): def reset_ui(self, encoder_models_dir, synthesizer_models_dir, vocoder_models_dir):
self.ui.populate_browser(self.datasets_root, recognized_datasets, 0, True) self.ui.populate_browser(self.datasets_root, recognized_datasets, 0, True)
self.ui.populate_models(encoder_models_dir, synthesizer_models_dir, vocoder_models_dir) self.ui.populate_models(encoder_models_dir, synthesizer_models_dir, vocoder_models_dir)
...@@ -212,6 +237,30 @@ class Toolbox: ...@@ -212,6 +237,30 @@ class Toolbox:
wav = wav / np.abs(wav).max() * 0.97 wav = wav / np.abs(wav).max() * 0.97
self.ui.play(wav, Synthesizer.sample_rate) self.ui.play(wav, Synthesizer.sample_rate)
# Name it (history displayed in combobox)
# TODO better naming for the combobox items?
wav_name = str(self.waves_count + 1)
#Update waves combobox
self.waves_count += 1
if self.waves_count > MAX_WAVES:
self.waves_list.pop()
self.waves_namelist.pop()
self.waves_list.insert(0, wav)
self.waves_namelist.insert(0, wav_name)
self.ui.waves_cb.disconnect()
self.ui.waves_cb_model.setStringList(self.waves_namelist)
self.ui.waves_cb.setCurrentIndex(0)
self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav)
# Update current wav
self.set_current_wav(0)
#Enable replay and save buttons:
self.ui.replay_wav_button.setDisabled(False)
self.ui.export_wav_button.setDisabled(False)
# Compute the embedding # Compute the embedding
# TODO: this is problematic with different sampling rates, gotta fix it # TODO: this is problematic with different sampling rates, gotta fix it
if not encoder.is_loaded(): if not encoder.is_loaded():
......
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure from matplotlib.figure import Figure
from PyQt5.QtCore import Qt from PyQt5.QtCore import Qt, QStringListModel
from PyQt5.QtWidgets import * from PyQt5.QtWidgets import *
from encoder.inference import plot_embedding_as_heatmap from encoder.inference import plot_embedding_as_heatmap
from toolbox.utterance import Utterance from toolbox.utterance import Utterance
from pathlib import Path from pathlib import Path
from typing import List, Set from typing import List, Set
import sounddevice as sd import sounddevice as sd
import soundfile as sf
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
# from sklearn.manifold import TSNE # You can try with TSNE if you like, I prefer UMAP # from sklearn.manifold import TSNE # You can try with TSNE if you like, I prefer UMAP
...@@ -137,7 +138,21 @@ class UI(QDialog): ...@@ -137,7 +138,21 @@ class UI(QDialog):
self.umap_ax.set_yticks([]) self.umap_ax.set_yticks([])
self.umap_ax.figure.canvas.draw() self.umap_ax.figure.canvas.draw()
def setup_audio_devices(self,sample_rate): def save_audio_file(self, wav, sample_rate):
dialog = QFileDialog()
dialog.setDefaultSuffix(".wav")
fpath, _ = dialog.getSaveFileName(
parent=self,
caption="Select a path to save the audio file",
filter="Audio Files (*.flac *.wav)"
)
if fpath:
#Default format is wav
if Path(fpath).suffix == "":
fpath += ".wav"
sf.write(fpath, wav, sample_rate)
def setup_audio_devices(self, sample_rate):
input_devices = [] input_devices = []
output_devices = [] output_devices = []
for device in sd.query_devices(): for device in sd.query_devices():
...@@ -389,6 +404,8 @@ class UI(QDialog): ...@@ -389,6 +404,8 @@ class UI(QDialog):
self.generate_button.setDisabled(True) self.generate_button.setDisabled(True)
self.synthesize_button.setDisabled(True) self.synthesize_button.setDisabled(True)
self.vocode_button.setDisabled(True) self.vocode_button.setDisabled(True)
self.replay_wav_button.setDisabled(True)
self.export_wav_button.setDisabled(True)
[self.log("") for _ in range(self.max_log_lines)] [self.log("") for _ in range(self.max_log_lines)]
def __init__(self): def __init__(self):
...@@ -537,6 +554,22 @@ class UI(QDialog): ...@@ -537,6 +554,22 @@ class UI(QDialog):
layout.addWidget(self.vocode_button) layout.addWidget(self.vocode_button)
gen_layout.addLayout(layout) gen_layout.addLayout(layout)
#Replay & Save Audio
layout2 = QHBoxLayout()
self.replay_wav_button = QPushButton("Replay")
self.replay_wav_button.setToolTip("Replay last generated vocoder")
layout2.addWidget(self.replay_wav_button)
self.export_wav_button = QPushButton("Export")
self.export_wav_button.setToolTip("Save last generated vocoder audio in filesystem as a wav file")
layout2.addWidget(self.export_wav_button)
self.waves_cb_model = QStringListModel()
self.waves_cb = QComboBox()
self.waves_cb.setModel(self.waves_cb_model)
self.waves_cb.setToolTip("Select one of the last generated waves in this section for replaying or exporting")
layout2.addWidget(self.waves_cb)
gen_layout.addLayout(layout2)
self.loading_bar = QProgressBar() self.loading_bar = QProgressBar()
gen_layout.addWidget(self.loading_bar) gen_layout.addWidget(self.loading_bar)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册