未验证 提交 6944770f 编写于 作者: M Matheus Fillipe 提交者: GitHub

Export and replay generated wavs in toolbox (#402)

上级 f49f64f6
......@@ -34,6 +34,9 @@ recognized_datasets = [
"VCTK-Corpus/wav48",
]
#Maximum of generated wavs to keep on memory
MAX_WAVES = 15
class Toolbox:
def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir, low_mem):
sys.excepthook = self.excepthook
......@@ -43,6 +46,10 @@ class Toolbox:
self.current_generated = (None, None, None, None) # speaker_name, spec, breaks, wav
self.synthesizer = None # type: Synthesizer
self.current_wav = None
self.waves_list = []
self.waves_count = 0
self.waves_namelist = []
# Initialize the events and the interface
self.ui = UI()
......@@ -82,8 +89,17 @@ class Toolbox:
self.ui.play_button.clicked.connect(func)
self.ui.stop_button.clicked.connect(self.ui.stop)
self.ui.record_button.clicked.connect(self.record)
#Audio
self.ui.setup_audio_devices(Synthesizer.sample_rate)
#Wav playback & save
func = lambda: self.replay_last_wav()
self.ui.replay_wav_button.clicked.connect(func)
func = lambda: self.export_current_wave()
self.ui.export_wav_button.clicked.connect(func)
self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav)
# Generation
func = lambda: self.synthesize() or self.vocode()
self.ui.generate_button.clicked.connect(func)
......@@ -93,6 +109,15 @@ class Toolbox:
# UMAP legend
self.ui.clear_button.clicked.connect(self.clear_utterances)
def set_current_wav(self, index):
self.current_wav = self.waves_list[index]
def export_current_wave(self):
self.ui.save_audio_file(self.current_wav, Synthesizer.sample_rate)
def replay_last_wav(self):
self.ui.play(self.current_wav, Synthesizer.sample_rate)
def reset_ui(self, encoder_models_dir, synthesizer_models_dir, vocoder_models_dir):
self.ui.populate_browser(self.datasets_root, recognized_datasets, 0, True)
self.ui.populate_models(encoder_models_dir, synthesizer_models_dir, vocoder_models_dir)
......@@ -212,6 +237,30 @@ class Toolbox:
wav = wav / np.abs(wav).max() * 0.97
self.ui.play(wav, Synthesizer.sample_rate)
# Name it (history displayed in combobox)
# TODO better naming for the combobox items?
wav_name = str(self.waves_count + 1)
#Update waves combobox
self.waves_count += 1
if self.waves_count > MAX_WAVES:
self.waves_list.pop()
self.waves_namelist.pop()
self.waves_list.insert(0, wav)
self.waves_namelist.insert(0, wav_name)
self.ui.waves_cb.disconnect()
self.ui.waves_cb_model.setStringList(self.waves_namelist)
self.ui.waves_cb.setCurrentIndex(0)
self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav)
# Update current wav
self.set_current_wav(0)
#Enable replay and save buttons:
self.ui.replay_wav_button.setDisabled(False)
self.ui.export_wav_button.setDisabled(False)
# Compute the embedding
# TODO: this is problematic with different sampling rates, gotta fix it
if not encoder.is_loaded():
......
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
from PyQt5.QtCore import Qt
from PyQt5.QtCore import Qt, QStringListModel
from PyQt5.QtWidgets import *
from encoder.inference import plot_embedding_as_heatmap
from toolbox.utterance import Utterance
from pathlib import Path
from typing import List, Set
import sounddevice as sd
import soundfile as sf
import matplotlib.pyplot as plt
import numpy as np
# from sklearn.manifold import TSNE # You can try with TSNE if you like, I prefer UMAP
......@@ -137,7 +138,21 @@ class UI(QDialog):
self.umap_ax.set_yticks([])
self.umap_ax.figure.canvas.draw()
def setup_audio_devices(self,sample_rate):
def save_audio_file(self, wav, sample_rate):
dialog = QFileDialog()
dialog.setDefaultSuffix(".wav")
fpath, _ = dialog.getSaveFileName(
parent=self,
caption="Select a path to save the audio file",
filter="Audio Files (*.flac *.wav)"
)
if fpath:
#Default format is wav
if Path(fpath).suffix == "":
fpath += ".wav"
sf.write(fpath, wav, sample_rate)
def setup_audio_devices(self, sample_rate):
input_devices = []
output_devices = []
for device in sd.query_devices():
......@@ -389,6 +404,8 @@ class UI(QDialog):
self.generate_button.setDisabled(True)
self.synthesize_button.setDisabled(True)
self.vocode_button.setDisabled(True)
self.replay_wav_button.setDisabled(True)
self.export_wav_button.setDisabled(True)
[self.log("") for _ in range(self.max_log_lines)]
def __init__(self):
......@@ -537,6 +554,22 @@ class UI(QDialog):
layout.addWidget(self.vocode_button)
gen_layout.addLayout(layout)
#Replay & Save Audio
layout2 = QHBoxLayout()
self.replay_wav_button = QPushButton("Replay")
self.replay_wav_button.setToolTip("Replay last generated vocoder")
layout2.addWidget(self.replay_wav_button)
self.export_wav_button = QPushButton("Export")
self.export_wav_button.setToolTip("Save last generated vocoder audio in filesystem as a wav file")
layout2.addWidget(self.export_wav_button)
self.waves_cb_model = QStringListModel()
self.waves_cb = QComboBox()
self.waves_cb.setModel(self.waves_cb_model)
self.waves_cb.setToolTip("Select one of the last generated waves in this section for replaying or exporting")
layout2.addWidget(self.waves_cb)
gen_layout.addLayout(layout2)
self.loading_bar = QProgressBar()
gen_layout.addWidget(self.loading_bar)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册