Export and replay generated wavs in toolbox (#402)

6944770f · Matheus Fillipe · GitHub · f49f64f6 · 6944770f · 6944770f
隐藏空白更改
内联并排

Showing with 85 addition and 2 deletion

requirements.txt requirements.txt +1 -0

toolbox/__init__.py toolbox/__init__.py +49 -0

toolbox/ui.py toolbox/ui.py +35 -2

未找到文件。
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ numpy>=1.14.0
 scipy>=1.0.0
 tqdm
 sounddevice
+SoundFile
 Unidecode
 inflect
 PyQt5

--- a/toolbox/__init__.py
+++ b/toolbox/__init__.py
@@ -34,6 +34,9 @@ recognized_datasets = [
    "VCTK-Corpus/wav48",
 ]

+#Maximum of generated wavs to keep on memory
+MAX_WAVES = 15
+
 class Toolbox:
    def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir, low_mem):
        sys.excepthook = self.excepthook
@@ -43,6 +46,10 @@ class Toolbox:
        self.current_generated = (None, None, None, None) # speaker_name, spec, breaks, wav
        
        self.synthesizer = None # type: Synthesizer
+        self.current_wav = None
+        self.waves_list = []
+        self.waves_count = 0
+        self.waves_namelist = []
        
        # Initialize the events and the interface
        self.ui = UI()
@@ -82,8 +89,17 @@ class Toolbox:
        self.ui.play_button.clicked.connect(func)
        self.ui.stop_button.clicked.connect(self.ui.stop)
        self.ui.record_button.clicked.connect(self.record)
+
+        #Audio
        self.ui.setup_audio_devices(Synthesizer.sample_rate)

+        #Wav playback & save
+        func = lambda: self.replay_last_wav()
+        self.ui.replay_wav_button.clicked.connect(func)
+        func = lambda: self.export_current_wave()
+        self.ui.export_wav_button.clicked.connect(func)
+        self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav)
+
        # Generation
        func = lambda: self.synthesize() or self.vocode()
        self.ui.generate_button.clicked.connect(func)
@@ -93,6 +109,15 @@ class Toolbox:
        # UMAP legend
        self.ui.clear_button.clicked.connect(self.clear_utterances)

+    def set_current_wav(self, index):
+        self.current_wav = self.waves_list[index]
+
+    def export_current_wave(self):
+        self.ui.save_audio_file(self.current_wav, Synthesizer.sample_rate)
+
+    def replay_last_wav(self):
+        self.ui.play(self.current_wav, Synthesizer.sample_rate)
+
    def reset_ui(self, encoder_models_dir, synthesizer_models_dir, vocoder_models_dir):
        self.ui.populate_browser(self.datasets_root, recognized_datasets, 0, True)
        self.ui.populate_models(encoder_models_dir, synthesizer_models_dir, vocoder_models_dir)
@@ -212,6 +237,30 @@ class Toolbox:
        wav = wav / np.abs(wav).max() * 0.97
        self.ui.play(wav, Synthesizer.sample_rate)

+        # Name it (history displayed in combobox)
+        # TODO better naming for the combobox items?
+        wav_name = str(self.waves_count + 1)
+
+        #Update waves combobox
+        self.waves_count += 1
+        if self.waves_count > MAX_WAVES:
+          self.waves_list.pop()
+          self.waves_namelist.pop()
+        self.waves_list.insert(0, wav)
+        self.waves_namelist.insert(0, wav_name)
+
+        self.ui.waves_cb.disconnect()
+        self.ui.waves_cb_model.setStringList(self.waves_namelist)
+        self.ui.waves_cb.setCurrentIndex(0)
+        self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav)
+
+        # Update current wav
+        self.set_current_wav(0)
+        
+        #Enable replay and save buttons:
+        self.ui.replay_wav_button.setDisabled(False)
+        self.ui.export_wav_button.setDisabled(False)
+
        # Compute the embedding
        # TODO: this is problematic with different sampling rates, gotta fix it
        if not encoder.is_loaded():

--- a/toolbox/ui.py
+++ b/toolbox/ui.py
 from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
 from matplotlib.figure import Figure
-from PyQt5.QtCore import Qt
+from PyQt5.QtCore import Qt, QStringListModel
 from PyQt5.QtWidgets import *
 from encoder.inference import plot_embedding_as_heatmap
 from toolbox.utterance import Utterance
 from pathlib import Path
 from typing import List, Set
 import sounddevice as sd
+import soundfile as sf
 import matplotlib.pyplot as plt
 import numpy as np
 # from sklearn.manifold import TSNE         # You can try with TSNE if you like, I prefer UMAP 
@@ -137,7 +138,21 @@ class UI(QDialog):
        self.umap_ax.set_yticks([])
        self.umap_ax.figure.canvas.draw()

-    def setup_audio_devices(self,sample_rate):
+    def save_audio_file(self, wav, sample_rate):        
+        dialog = QFileDialog()
+        dialog.setDefaultSuffix(".wav")
+        fpath, _ = dialog.getSaveFileName(
+            parent=self,
+            caption="Select a path to save the audio file",
+            filter="Audio Files (*.flac *.wav)"
+        )
+        if fpath:
+            #Default format is wav
+            if Path(fpath).suffix == "":
+                fpath += ".wav"
+            sf.write(fpath, wav, sample_rate)
+
+    def setup_audio_devices(self, sample_rate):
        input_devices = []
        output_devices = []
        for device in sd.query_devices():
@@ -389,6 +404,8 @@ class UI(QDialog):
        self.generate_button.setDisabled(True)
        self.synthesize_button.setDisabled(True)
        self.vocode_button.setDisabled(True)
+        self.replay_wav_button.setDisabled(True)
+        self.export_wav_button.setDisabled(True)
        [self.log("") for _ in range(self.max_log_lines)]

    def __init__(self):
@@ -537,6 +554,22 @@ class UI(QDialog):
        layout.addWidget(self.vocode_button)
        gen_layout.addLayout(layout)

+
+        #Replay & Save Audio
+        layout2 = QHBoxLayout()
+        self.replay_wav_button = QPushButton("Replay")
+        self.replay_wav_button.setToolTip("Replay last generated vocoder")
+        layout2.addWidget(self.replay_wav_button)
+        self.export_wav_button = QPushButton("Export")
+        self.export_wav_button.setToolTip("Save last generated vocoder audio in filesystem as a wav file")
+        layout2.addWidget(self.export_wav_button)
+        self.waves_cb_model = QStringListModel()
+        self.waves_cb = QComboBox()
+        self.waves_cb.setModel(self.waves_cb_model)
+        self.waves_cb.setToolTip("Select one of the last generated waves in this section for replaying or exporting")
+        layout2.addWidget(self.waves_cb)
+        gen_layout.addLayout(layout2)
+
        self.loading_bar = QProgressBar()
        gen_layout.addWidget(self.loading_bar)