add audio file

193601a5 · chrisxu2014 · 602dcc81 · 193601a5 · 193601a5
展开全部隐藏空白更改
内联并排

Showing with 161 addition and 139 deletion

deep_speech_2/data_utils/audio.py deep_speech_2/data_utils/audio.py +113 -132

deep_speech_2/data_utils/speech.py deep_speech_2/data_utils/speech.py +48 -7

未找到文件。
--- a/deep_speech_2/data_utils/audio.py
+++ b/deep_speech_2/data_utils/audio.py
--- a/deep_speech_2/data_utils/speech.py
+++ b/deep_speech_2/data_utils/speech.py
@@ -67,20 +67,20 @@ class SpeechSegment(AudioSegment):

    @classmethod
    def concatenate(cls, *segments):
-        """Concatenate an arbitrary number of audio segments together.
+        """Concatenate an arbitrary number of speech segments together.

-        :param *segments: Input speech segments
+        :param *segments: Input speech segments.
        :type *segments: SpeechSegment
        :return: Speech segment instance.
        :rtype: SpeechSegment
-        :raises ValueError: If number of segments is zero, or if sample_rate
-                            not match between two audio segments
-        :raises TypeError: If item of segments is not Audiosegment instance
+        :raises ValueError: If the number of segments is zero, or if the 
+                            sample_rate of any two segments does not match.
+        :raises TypeError: If every segment in is not Audiosegment instance.
        """
-        # Perform basic sanity-checks.
        if len(segments) == 0:
            raise ValueError("No audio segments are given to concatenate.")
        sample_rate = segments[0]._sample_rate
+        transcripts = ""
        for seg in segments:
            if sample_rate != seg._sample_rate:
                raise ValueError("Can't concatenate segments with "
@@ -88,8 +88,49 @@ class SpeechSegment(AudioSegment):
            if type(seg) is not cls:
                raise TypeError("Only speech segments of the same type "
                                "instance can be concatenated.")
+            transcripts += seg._transcript
        samples = np.concatenate([seg.samples for seg in segments])
-        return cls(samples, sample_rate, seg._transcript)
+        return cls(samples, sample_rate, transcripts)
+
+    @classmethod
+    def slice_from_file(cls, filepath, start=None, end=None, transcript=""):
+        """Loads a small section of an speech without having to load
+        the entire file into the memory which can be incredibly wasteful.
+
+        :param filepath: Filepath or file object to audio file.
+        :type filepath: basestring|file
+        :param start: Start time in seconds. If start is negative, it wraps
+                      around from the end. If not provided, this function 
+                      reads from the very beginning.
+        :type start: float
+        :param end: End time in seconds. If end is negative, it wraps around
+                    from the end. If not provided, the default behvaior is
+                    to read to the end of the file.
+        :type end: float
+        :param transcript: Transcript text for the speech. if not provided, 
+                           the defaults is an empty string.
+        :type transript: basestring
+        :return: SpeechSegment instance of the specified slice of the input
+                 speech file.
+        :rtype: SpeechSegment
+        """
+        audio = Audiosegment.slice_from_file(filepath, start, end)
+        return cls(audio.samples, audio.sample_rate, transcripts)
+
+    @classmethod
+    def make_silence(cls, duration, sample_rate):
+        """Creates a silent speech segment of the given duration and
+        sample rate.
+
+        :param duration: Length of silence in seconds.
+        :type duration: float
+        :param sample_rate: Sample rate.
+        :type sample_rate: float
+        :return: Silence of the given duration.
+        :rtype: AudioSegment
+        """
+        audio = AudioSegment.make_silence(duration, sample_rate)
+        return cls(audio.samples, audio.sample_rate, "")

    @property
    def transcript(self):