source: python/lib/aubio/slicing.py @ 5ce504d

feature/autosinkfeature/cnnfeature/cnn_orgfeature/constantqfeature/crepefeature/crepe_orgfeature/pitchshiftfeature/pydocstringsfeature/timestretchfix/ffmpeg5
Last change on this file since 5ce504d was c9c012e, checked in by Paul Brossier <piem@piem.org>, 6 years ago

[py] [slicing] add option create_first, default to False

  • Property mode set to 100644
File size: 3.8 KB
RevLine 
[8b56b18]1"""utility routines to slice sound files at given timestamps"""
2
[88432a9]3import os
[8b56b18]4from aubio import source, sink
[88432a9]5
[8b56b18]6_max_timestamp = 1e120
[f36ecea]7
[8b56b18]8def slice_source_at_stamps(source_file, timestamps, timestamps_end=None,
[c9c012e]9                           output_dir=None, samplerate=0, hopsize=256,
10                           create_first=False):
[8b56b18]11    """ slice a sound file at given timestamps """
[88432a9]12
[8b56b18]13    if timestamps is None or len(timestamps) == 0:
14        raise ValueError("no timestamps given")
[aee840b]15
[c9c012e]16    if timestamps[0] != 0 and create_first:
[f36ecea]17        timestamps = [0] + timestamps
[dc654f8]18        if timestamps_end is not None:
[35a44e9]19            timestamps_end = [timestamps[1] - 1] + timestamps_end
[f36ecea]20
[dc654f8]21    if timestamps_end is not None:
[a88594d]22        if len(timestamps_end) == len(timestamps) - 1:
23            timestamps_end = timestamps_end + [_max_timestamp]
24        elif len(timestamps_end) != len(timestamps):
[8b56b18]25            raise ValueError("len(timestamps_end) != len(timestamps)")
[f36ecea]26    else:
[8b56b18]27        timestamps_end = [t - 1 for t in timestamps[1:]] + [_max_timestamp]
[aee840b]28
[0e59ae0]29    regions = list(zip(timestamps, timestamps_end))
[4320679]30    #print regions
31
[8b56b18]32    source_base_name, _ = os.path.splitext(os.path.basename(source_file))
[dc654f8]33    if output_dir is not None:
[88432a9]34        if not os.path.isdir(output_dir):
35            os.makedirs(output_dir)
36        source_base_name = os.path.join(output_dir, source_base_name)
37
[f36ecea]38    def new_sink_name(source_base_name, timestamp, samplerate):
[8b56b18]39        """ create a sink based on a timestamp in samples, converted in seconds """
[f36ecea]40        timestamp_seconds = timestamp / float(samplerate)
[6fbee46]41        return source_base_name + "_%011.6f" % timestamp_seconds + '.wav'
[88432a9]42
[8b56b18]43    # open source file
44    _source = source(source_file, samplerate, hopsize)
[1b62ee9]45    samplerate = _source.samplerate
[f36ecea]46
[4320679]47    total_frames = 0
48    slices = []
[88432a9]49
50    while True:
51        # get hopsize new samples from source
[8b56b18]52        vec, read = _source.do_multi()
[4320679]53        # if the total number of frames read will exceed the next region start
[60c8a73]54        while len(regions) and total_frames + read >= regions[0][0]:
[4320679]55            #print "getting", regions[0], "at", total_frames
56            # get next region
57            start_stamp, end_stamp = regions.pop(0)
58            # create a name for the sink
[f36ecea]59            new_sink_path = new_sink_name(source_base_name, start_stamp, samplerate)
[4320679]60            # create its sink
[8b56b18]61            _sink = sink(new_sink_path, samplerate, _source.channels)
[4320679]62            # create a dictionary containing all this
[8b56b18]63            new_slice = {'start_stamp': start_stamp, 'end_stamp': end_stamp, 'sink': _sink}
[4320679]64            # append the dictionary to the current list of slices
65            slices.append(new_slice)
66
67        for current_slice in slices:
68            start_stamp = current_slice['start_stamp']
69            end_stamp = current_slice['end_stamp']
[8b56b18]70            _sink = current_slice['sink']
[4320679]71            # sample index to start writing from new source vector
72            start = max(start_stamp - total_frames, 0)
73            # number of samples yet to written be until end of region
74            remaining = end_stamp - total_frames + 1
75            #print current_slice, remaining, start
76            # not enough frames remaining, time to split
77            if remaining < read:
78                if remaining > start:
79                    # write remaining samples from current region
[8b56b18]80                    _sink.do_multi(vec[:, start:remaining], remaining - start)
[c9c012e]81                    #print("closing region", "remaining", remaining)
[4320679]82                    # close this file
[8b56b18]83                    _sink.close()
[4320679]84            elif read > start:
85                # write all the samples
[8b56b18]86                _sink.do_multi(vec[:, start:read], read - start)
[88432a9]87        total_frames += read
[04b31af]88        # remove old slices
89        slices = list(filter(lambda s: s['end_stamp'] > total_frames,
90            slices))
[8b56b18]91        if read < hopsize:
92            break
Note: See TracBrowser for help on using the repository browser.