source: python/lib/aubio/slicing.py @ 5c6a8587

feature/autosinkfeature/cnnfeature/cnn_orgfeature/constantqfeature/crepefeature/crepe_orgfeature/pitchshiftfeature/pydocstringsfeature/timestretchfix/ffmpeg5
Last change on this file since 5c6a8587 was 5c6a8587, checked in by Paul Brossier <piem@piem.org>, 5 years ago

[py] add docstring to slice_source_at_stamps

  • Property mode set to 100644
File size: 5.9 KB
Line 
1"""utility routines to slice sound files at given timestamps"""
2
3import os
4from aubio import source, sink
5
6_max_timestamp = 1e120
7
8def slice_source_at_stamps(source_file, timestamps, timestamps_end=None,
9                           output_dir=None, samplerate=0, hopsize=256,
10                           create_first=False):
11    """Slice a sound file at given timestamps.
12
13    This function reads `source_file` and creates slices, new smaller
14    files each starting at `t` in `timestamps`, a list of integer
15    corresponding to time locations in `source_file`, in samples.
16
17    If `timestamps_end` is unspecified, the slices will end at
18    `timestamps_end[n] = timestamps[n+1]-1`, or the end of file.
19    Otherwise, `timestamps_end` should be a list with the same length
20    as `timestamps` containing the locations of the end of each slice.
21
22    If `output_dir` is unspecified, the new slices will be written in
23    the current directory. If `output_dir` is a string, new slices
24    will be written in `output_dir`, after creating the directory if
25    required.
26
27    The default `samplerate` is 0, meaning the original sampling rate
28    of `source_file` will be used. When using a sampling rate
29    different to the one of the original files, `timestamps` and
30    `timestamps_end` should be expressed in re-sampled samples.
31
32    The `hopsize` parameter simply tells :class:`source` to use this
33    hopsize and does not change the output slices.
34
35    If `create_first` is True and `timestamps` does not start with `0`, the
36    first slice from `0` to `timestamps[0] - 1` will be automatically added.
37
38    Parameters
39    ----------
40    source_file : str
41        path of the resource to slice
42    timestamps : :obj:`list` of :obj:`int`
43        time stamps at which to slice, in samples
44    timestamps_end : :obj:`list` of :obj:`int` (optional)
45        time stamps at which to end the slices
46    output_dir : str (optional)
47        output directory to write the slices to
48    samplerate : int (optional)
49        samplerate to read the file at
50    hopsize : int (optional)
51        number of samples read from source per iteration
52    create_first : bool (optional)
53        always create the slice at the start of the file
54
55    Examples
56    --------
57    Create two slices, the first second of a file and the rest of it:
58
59    >>> aubio.slice_source_at_stamps('loop.wav', [0, 44100])
60
61    Create one slice, from 1 second to 2 seconds:
62
63    >>> aubio.slice_source_at_stamps('loop.wav', [44100], [44100*2])
64    """
65
66    if timestamps is None or len(timestamps) == 0:
67        raise ValueError("no timestamps given")
68
69    if timestamps[0] != 0 and create_first:
70        timestamps = [0] + timestamps
71        if timestamps_end is not None:
72            timestamps_end = [timestamps[1] - 1] + timestamps_end
73
74    if timestamps_end is not None:
75        if len(timestamps_end) == len(timestamps) - 1:
76            timestamps_end = timestamps_end + [_max_timestamp]
77        elif len(timestamps_end) != len(timestamps):
78            raise ValueError("len(timestamps_end) != len(timestamps)")
79    else:
80        timestamps_end = [t - 1 for t in timestamps[1:]] + [_max_timestamp]
81
82    regions = list(zip(timestamps, timestamps_end))
83    #print regions
84
85    source_base_name, _ = os.path.splitext(os.path.basename(source_file))
86    if output_dir is not None:
87        if not os.path.isdir(output_dir):
88            os.makedirs(output_dir)
89        source_base_name = os.path.join(output_dir, source_base_name)
90
91    def new_sink_name(source_base_name, timestamp, samplerate):
92        """ create a sink based on a timestamp in samples, converted in seconds """
93        timestamp_seconds = timestamp / float(samplerate)
94        return source_base_name + "_%011.6f" % timestamp_seconds + '.wav'
95
96    # open source file
97    _source = source(source_file, samplerate, hopsize)
98    samplerate = _source.samplerate
99
100    total_frames = 0
101    slices = []
102
103    while True:
104        # get hopsize new samples from source
105        vec, read = _source.do_multi()
106        # if the total number of frames read will exceed the next region start
107        while len(regions) and total_frames + read >= regions[0][0]:
108            #print "getting", regions[0], "at", total_frames
109            # get next region
110            start_stamp, end_stamp = regions.pop(0)
111            # create a name for the sink
112            new_sink_path = new_sink_name(source_base_name, start_stamp, samplerate)
113            # create its sink
114            _sink = sink(new_sink_path, samplerate, _source.channels)
115            # create a dictionary containing all this
116            new_slice = {'start_stamp': start_stamp, 'end_stamp': end_stamp, 'sink': _sink}
117            # append the dictionary to the current list of slices
118            slices.append(new_slice)
119
120        for current_slice in slices:
121            start_stamp = current_slice['start_stamp']
122            end_stamp = current_slice['end_stamp']
123            _sink = current_slice['sink']
124            # sample index to start writing from new source vector
125            start = max(start_stamp - total_frames, 0)
126            # number of samples yet to written be until end of region
127            remaining = end_stamp - total_frames + 1
128            #print current_slice, remaining, start
129            # not enough frames remaining, time to split
130            if remaining < read:
131                if remaining > start:
132                    # write remaining samples from current region
133                    _sink.do_multi(vec[:, start:remaining], remaining - start)
134                    #print("closing region", "remaining", remaining)
135                    # close this file
136                    _sink.close()
137            elif read > start:
138                # write all the samples
139                _sink.do_multi(vec[:, start:read], read - start)
140        total_frames += read
141        # remove old slices
142        slices = list(filter(lambda s: s['end_stamp'] > total_frames,
143            slices))
144        if read < hopsize:
145            break
Note: See TracBrowser for help on using the repository browser.