source: python/lib/aubio/slicing.py @ e836160

feature/cnnfeature/crepefix/ffmpeg5
Last change on this file since e836160 was 2c8ada6, checked in by Paul Brossier <piem@piem.org>, 6 years ago

[py] improve code style of slicing.py

  • Property mode set to 100644
File size: 6.1 KB
RevLine 
[8b56b18]1"""utility routines to slice sound files at given timestamps"""
2
[88432a9]3import os
[8b56b18]4from aubio import source, sink
[88432a9]5
[8b56b18]6_max_timestamp = 1e120
[f36ecea]7
[2c8ada6]8
[8b56b18]9def slice_source_at_stamps(source_file, timestamps, timestamps_end=None,
[c9c012e]10                           output_dir=None, samplerate=0, hopsize=256,
11                           create_first=False):
[5c6a8587]12    """Slice a sound file at given timestamps.
13
14    This function reads `source_file` and creates slices, new smaller
15    files each starting at `t` in `timestamps`, a list of integer
16    corresponding to time locations in `source_file`, in samples.
17
18    If `timestamps_end` is unspecified, the slices will end at
19    `timestamps_end[n] = timestamps[n+1]-1`, or the end of file.
20    Otherwise, `timestamps_end` should be a list with the same length
21    as `timestamps` containing the locations of the end of each slice.
22
23    If `output_dir` is unspecified, the new slices will be written in
24    the current directory. If `output_dir` is a string, new slices
25    will be written in `output_dir`, after creating the directory if
26    required.
27
28    The default `samplerate` is 0, meaning the original sampling rate
29    of `source_file` will be used. When using a sampling rate
30    different to the one of the original files, `timestamps` and
[97a8bef]31    `timestamps_end` should be expressed in the re-sampled signal.
[5c6a8587]32
33    The `hopsize` parameter simply tells :class:`source` to use this
34    hopsize and does not change the output slices.
35
36    If `create_first` is True and `timestamps` does not start with `0`, the
37    first slice from `0` to `timestamps[0] - 1` will be automatically added.
38
39    Parameters
40    ----------
41    source_file : str
42        path of the resource to slice
43    timestamps : :obj:`list` of :obj:`int`
44        time stamps at which to slice, in samples
45    timestamps_end : :obj:`list` of :obj:`int` (optional)
46        time stamps at which to end the slices
47    output_dir : str (optional)
48        output directory to write the slices to
49    samplerate : int (optional)
50        samplerate to read the file at
51    hopsize : int (optional)
52        number of samples read from source per iteration
53    create_first : bool (optional)
54        always create the slice at the start of the file
55
56    Examples
57    --------
[f9400d0]58    Create two slices: the first slice starts at the beginning of the
59    input file `loop.wav` and lasts exactly one second, starting at
60    sample `0` and ending at sample `44099`; the second slice starts
61    at sample `44100` and lasts until the end of the input file:
[5c6a8587]62
63    >>> aubio.slice_source_at_stamps('loop.wav', [0, 44100])
64
65    Create one slice, from 1 second to 2 seconds:
66
[f9400d0]67    >>> aubio.slice_source_at_stamps('loop.wav', [44100], [44100 * 2 - 1])
[97a8bef]68
69    Notes
70    -----
71    Slices may be overlapping. If `timestamps_end` is `1` element
72    shorter than `timestamps`, the last slice will end at the end of
73    the file.
[5c6a8587]74    """
[88432a9]75
[2c8ada6]76    if not timestamps:
[8b56b18]77        raise ValueError("no timestamps given")
[aee840b]78
[c9c012e]79    if timestamps[0] != 0 and create_first:
[f36ecea]80        timestamps = [0] + timestamps
[dc654f8]81        if timestamps_end is not None:
[35a44e9]82            timestamps_end = [timestamps[1] - 1] + timestamps_end
[f36ecea]83
[dc654f8]84    if timestamps_end is not None:
[a88594d]85        if len(timestamps_end) == len(timestamps) - 1:
86            timestamps_end = timestamps_end + [_max_timestamp]
87        elif len(timestamps_end) != len(timestamps):
[8b56b18]88            raise ValueError("len(timestamps_end) != len(timestamps)")
[f36ecea]89    else:
[8b56b18]90        timestamps_end = [t - 1 for t in timestamps[1:]] + [_max_timestamp]
[aee840b]91
[0e59ae0]92    regions = list(zip(timestamps, timestamps_end))
[4320679]93
[8b56b18]94    source_base_name, _ = os.path.splitext(os.path.basename(source_file))
[dc654f8]95    if output_dir is not None:
[88432a9]96        if not os.path.isdir(output_dir):
97            os.makedirs(output_dir)
98        source_base_name = os.path.join(output_dir, source_base_name)
99
[2c8ada6]100    def _new_sink_name(source_base_name, timestamp, samplerate):
101        # create name based on a timestamp in samples, converted in seconds
[f36ecea]102        timestamp_seconds = timestamp / float(samplerate)
[6fbee46]103        return source_base_name + "_%011.6f" % timestamp_seconds + '.wav'
[88432a9]104
[8b56b18]105    # open source file
106    _source = source(source_file, samplerate, hopsize)
[1b62ee9]107    samplerate = _source.samplerate
[f36ecea]108
[4320679]109    total_frames = 0
110    slices = []
[88432a9]111
112    while True:
113        # get hopsize new samples from source
[8b56b18]114        vec, read = _source.do_multi()
[4320679]115        # if the total number of frames read will exceed the next region start
[2c8ada6]116        while regions and total_frames + read >= regions[0][0]:
[4320679]117            # get next region
118            start_stamp, end_stamp = regions.pop(0)
119            # create a name for the sink
[2c8ada6]120            new_sink_path = _new_sink_name(source_base_name, start_stamp,
121                                           samplerate)
[4320679]122            # create its sink
[8b56b18]123            _sink = sink(new_sink_path, samplerate, _source.channels)
[4320679]124            # create a dictionary containing all this
[2c8ada6]125            new_slice = {'start_stamp': start_stamp, 'end_stamp': end_stamp,
126                         'sink': _sink}
[4320679]127            # append the dictionary to the current list of slices
128            slices.append(new_slice)
129
130        for current_slice in slices:
131            start_stamp = current_slice['start_stamp']
132            end_stamp = current_slice['end_stamp']
[8b56b18]133            _sink = current_slice['sink']
[4320679]134            # sample index to start writing from new source vector
135            start = max(start_stamp - total_frames, 0)
136            # number of samples yet to written be until end of region
137            remaining = end_stamp - total_frames + 1
138            # not enough frames remaining, time to split
139            if remaining < read:
140                if remaining > start:
141                    # write remaining samples from current region
[8b56b18]142                    _sink.do_multi(vec[:, start:remaining], remaining - start)
[4320679]143                    # close this file
[8b56b18]144                    _sink.close()
[4320679]145            elif read > start:
146                # write all the samples
[8b56b18]147                _sink.do_multi(vec[:, start:read], read - start)
[88432a9]148        total_frames += read
[04b31af]149        # remove old slices
150        slices = list(filter(lambda s: s['end_stamp'] > total_frames,
[2c8ada6]151                             slices))
[8b56b18]152        if read < hopsize:
153            break
Note: See TracBrowser for help on using the repository browser.