1 | #! /usr/bin/env python |
---|
2 | # -*- coding: utf-8 -*- |
---|
3 | |
---|
4 | """aubio command line tool |
---|
5 | |
---|
6 | This file was written by Paul Brossier <piem@aubio.org> and is released under |
---|
7 | the GNU/GPL v3. |
---|
8 | |
---|
9 | Note: this script is mostly about parsing command line arguments. For more |
---|
10 | readable code examples, check out the `python/demos` folder.""" |
---|
11 | |
---|
12 | import sys |
---|
13 | import argparse |
---|
14 | import aubio |
---|
15 | |
---|
16 | def aubio_parser(): |
---|
17 | epilog = 'use "%(prog)s <command> --help" for more info about each command' |
---|
18 | parser = argparse.ArgumentParser(epilog=epilog) |
---|
19 | parser.add_argument('-V', '--version', help="show version", |
---|
20 | action="store_true", dest="show_version") |
---|
21 | |
---|
22 | subparsers = parser.add_subparsers(title='commands', dest='command', |
---|
23 | metavar="") |
---|
24 | |
---|
25 | # onset subcommand |
---|
26 | subparser = subparsers.add_parser('onset', |
---|
27 | help='estimate time of onsets (beginning of sound event)', |
---|
28 | formatter_class = argparse.ArgumentDefaultsHelpFormatter) |
---|
29 | parser_add_input(subparser) |
---|
30 | parser_add_buf_hop_size(subparser) |
---|
31 | helpstr = "onset novelty function" |
---|
32 | helpstr += " <default|energy|hfc|complex|phase|specdiff|kl|mkl|specflux>" |
---|
33 | parser_add_method(subparser, helpstr=helpstr) |
---|
34 | parser_add_threshold(subparser) |
---|
35 | parser_add_silence(subparser) |
---|
36 | parser_add_minioi(subparser) |
---|
37 | parser_add_time_format(subparser) |
---|
38 | parser_add_verbose_help(subparser) |
---|
39 | subparser.set_defaults(process=process_onset) |
---|
40 | |
---|
41 | # pitch subcommand |
---|
42 | subparser = subparsers.add_parser('pitch', |
---|
43 | help='estimate fundamental frequency (monophonic)') |
---|
44 | parser_add_input(subparser) |
---|
45 | parser_add_buf_hop_size(subparser, buf_size=2048) |
---|
46 | helpstr = "pitch detection method <default|yinfft|yin|mcomb|fcomb|schmitt>" |
---|
47 | parser_add_method(subparser, helpstr=helpstr) |
---|
48 | parser_add_threshold(subparser) |
---|
49 | parser_add_silence(subparser) |
---|
50 | parser_add_time_format(subparser) |
---|
51 | parser_add_verbose_help(subparser) |
---|
52 | subparser.set_defaults(process=process_pitch) |
---|
53 | |
---|
54 | # beat subcommand |
---|
55 | subparser = subparsers.add_parser('beat', |
---|
56 | help='estimate location of beats') |
---|
57 | parser_add_input(subparser) |
---|
58 | parser_add_buf_hop_size(subparser, buf_size=1024, hop_size=512) |
---|
59 | parser_add_time_format(subparser) |
---|
60 | parser_add_verbose_help(subparser) |
---|
61 | subparser.set_defaults(process=process_beat) |
---|
62 | |
---|
63 | # tempo subcommand |
---|
64 | subparser = subparsers.add_parser('tempo', |
---|
65 | help='estimate overall tempo in bpm') |
---|
66 | parser_add_input(subparser) |
---|
67 | parser_add_buf_hop_size(subparser, buf_size=1024, hop_size=512) |
---|
68 | parser_add_time_format(subparser) |
---|
69 | parser_add_verbose_help(subparser) |
---|
70 | subparser.set_defaults(process=process_tempo) |
---|
71 | |
---|
72 | # notes subcommand |
---|
73 | subparser = subparsers.add_parser('notes', |
---|
74 | help='estimate midi-like notes (monophonic)') |
---|
75 | parser_add_input(subparser) |
---|
76 | parser_add_buf_hop_size(subparser) |
---|
77 | parser_add_time_format(subparser) |
---|
78 | parser_add_verbose_help(subparser) |
---|
79 | subparser.set_defaults(process=process_notes) |
---|
80 | |
---|
81 | # mfcc subcommand |
---|
82 | subparser = subparsers.add_parser('mfcc', |
---|
83 | help='extract Mel-Frequency Cepstrum Coefficients') |
---|
84 | parser_add_input(subparser) |
---|
85 | parser_add_buf_hop_size(subparser) |
---|
86 | parser_add_time_format(subparser) |
---|
87 | parser_add_verbose_help(subparser) |
---|
88 | subparser.set_defaults(process=process_mfcc) |
---|
89 | |
---|
90 | # melbands subcommand |
---|
91 | subparser = subparsers.add_parser('melbands', |
---|
92 | help='extract energies in Mel-frequency bands') |
---|
93 | parser_add_input(subparser) |
---|
94 | parser_add_buf_hop_size(subparser) |
---|
95 | parser_add_time_format(subparser) |
---|
96 | parser_add_verbose_help(subparser) |
---|
97 | subparser.set_defaults(process=process_melbands) |
---|
98 | |
---|
99 | return parser |
---|
100 | |
---|
101 | def parser_add_input(parser): |
---|
102 | parser.add_argument("source_uri", default=None, nargs='?', |
---|
103 | help="input sound file to analyse", metavar = "<source_uri>") |
---|
104 | parser.add_argument("-i", "--input", dest = "source_uri2", |
---|
105 | help="input sound file to analyse", metavar = "<source_uri>") |
---|
106 | parser.add_argument("-r", "--samplerate", |
---|
107 | metavar = "<freq>", type=int, |
---|
108 | action="store", dest="samplerate", default=0, |
---|
109 | help="samplerate at which the file should be represented") |
---|
110 | |
---|
111 | def parser_add_verbose_help(parser): |
---|
112 | parser.add_argument("-v","--verbose", |
---|
113 | action="count", dest="verbose", default=1, |
---|
114 | help="make lots of noise [default]") |
---|
115 | parser.add_argument("-q","--quiet", |
---|
116 | action="store_const", dest="verbose", const=0, |
---|
117 | help="be quiet") |
---|
118 | |
---|
119 | def parser_add_buf_hop_size(parser, buf_size=512, hop_size=256): |
---|
120 | parser.add_argument("-B","--bufsize", |
---|
121 | action="store", dest="buf_size", default=buf_size, |
---|
122 | metavar = "<size>", type=int, |
---|
123 | help="buffer size [default=%d]" % buf_size) |
---|
124 | parser.add_argument("-H","--hopsize", |
---|
125 | metavar = "<size>", type=int, |
---|
126 | action="store", dest="hop_size", default=hop_size, |
---|
127 | help="overlap size [default=%d]" % hop_size) |
---|
128 | |
---|
129 | def parser_add_method(parser, method='default', helpstr='method'): |
---|
130 | parser.add_argument("-m","--method", |
---|
131 | metavar = "<method>", type=str, |
---|
132 | action="store", dest="method", default=method, |
---|
133 | help="%s [default=%s]" % (helpstr, method)) |
---|
134 | |
---|
135 | def parser_add_threshold(parser, default=None): |
---|
136 | parser.add_argument("-t","--threshold", |
---|
137 | metavar = "<threshold>", type=float, |
---|
138 | action="store", dest="threshold", default=default, |
---|
139 | help="threshold [default=%s]" % default) |
---|
140 | |
---|
141 | def parser_add_silence(parser): |
---|
142 | parser.add_argument("-s", "--silence", |
---|
143 | metavar = "<value>", type=float, |
---|
144 | action="store", dest="silence", default=-70, |
---|
145 | help="silence threshold") |
---|
146 | |
---|
147 | def parser_add_minioi(parser): |
---|
148 | parser.add_argument("-M", "--minioi", |
---|
149 | metavar = "<value>", type=str, |
---|
150 | action="store", dest="minioi", default="12ms", |
---|
151 | help="minimum Inter-Onset Interval") |
---|
152 | |
---|
153 | def parser_add_time_format(parser): |
---|
154 | helpstr = "select time values output format (samples, ms, seconds)" |
---|
155 | helpstr += " [default=seconds]" |
---|
156 | parser.add_argument("-T", "--time-format", |
---|
157 | metavar='format', |
---|
158 | dest="time_format", |
---|
159 | default=None, |
---|
160 | help=helpstr) |
---|
161 | |
---|
162 | # some utilities |
---|
163 | |
---|
164 | def samples2seconds(n_frames, samplerate): |
---|
165 | return "%f\t" % (n_frames / float(samplerate)) |
---|
166 | |
---|
167 | def samples2milliseconds(n_frames, samplerate): |
---|
168 | return "%f\t" % (1000. * n_frames / float(samplerate)) |
---|
169 | |
---|
170 | def samples2samples(n_frames, samplerate): |
---|
171 | return "%d\t" % n_frames |
---|
172 | |
---|
173 | def timefunc(mode): |
---|
174 | if mode is None or mode == 'seconds' or mode == 's': |
---|
175 | return samples2seconds |
---|
176 | elif mode == 'ms' or mode == 'milliseconds': |
---|
177 | return samples2milliseconds |
---|
178 | elif mode == 'samples': |
---|
179 | return samples2samples |
---|
180 | else: |
---|
181 | raise ValueError('invalid time format %s' % mode) |
---|
182 | |
---|
183 | # definition of processing classes |
---|
184 | |
---|
185 | class default_process(object): |
---|
186 | def __init__(self, args): |
---|
187 | if 'time_format' in args: |
---|
188 | self.time2string = timefunc(args.time_format) |
---|
189 | if args.verbose > 2 and hasattr(self, 'options'): |
---|
190 | name = type(self).__name__.split('_')[1] |
---|
191 | optstr = ' '.join(['running', name, 'with options', repr(self.options), '\n']) |
---|
192 | sys.stderr.write(optstr) |
---|
193 | def flush(self, n_frames, samplerate): |
---|
194 | # optionally called at the end of process |
---|
195 | pass |
---|
196 | |
---|
197 | def parse_options(self, args, valid_opts): |
---|
198 | # get any valid options found in a dictionnary of arguments |
---|
199 | options = {k :v for k,v in vars(args).items() if k in valid_opts} |
---|
200 | self.options = options |
---|
201 | |
---|
202 | def remap_pvoc_options(self, options): |
---|
203 | # FIXME: we need to remap buf_size to win_s, hop_size to hop_s |
---|
204 | # adjust python/ext/py-phasevoc.c to understand buf_size/hop_size |
---|
205 | if 'buf_size' in options: |
---|
206 | options['win_s'] = options['buf_size'] |
---|
207 | del options['buf_size'] |
---|
208 | if 'hop_size' in options: |
---|
209 | options['hop_s'] = options['hop_size'] |
---|
210 | del options['hop_size'] |
---|
211 | self.options = options |
---|
212 | |
---|
213 | class process_onset(default_process): |
---|
214 | valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate'] |
---|
215 | def __init__(self, args): |
---|
216 | self.parse_options(args, self.valid_opts) |
---|
217 | self.onset = aubio.onset(**self.options) |
---|
218 | if args.threshold is not None: |
---|
219 | self.onset.set_threshold(args.threshold) |
---|
220 | if args.minioi: |
---|
221 | if args.minioi.endswith('ms'): |
---|
222 | self.onset.set_minioi_ms(float(args.minioi[:-2])) |
---|
223 | elif args.minioi.endswith('s'): |
---|
224 | self.onset.set_minioi_s(float(args.minioi[:-1])) |
---|
225 | else: |
---|
226 | self.onset.set_minioi(int(args.minioi)) |
---|
227 | if args.silence: |
---|
228 | self.onset.set_silence(args.silence) |
---|
229 | super(process_onset, self).__init__(args) |
---|
230 | def __call__(self, block): |
---|
231 | return self.onset(block) |
---|
232 | def repr_res(self, res, frames_read, samplerate): |
---|
233 | if res[0] != 0: |
---|
234 | outstr = self.time2string(self.onset.get_last(), samplerate) |
---|
235 | sys.stdout.write(outstr + '\n') |
---|
236 | |
---|
237 | class process_pitch(default_process): |
---|
238 | valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate'] |
---|
239 | def __init__(self, args): |
---|
240 | self.parse_options(args, self.valid_opts) |
---|
241 | self.pitch = aubio.pitch(**self.options) |
---|
242 | if args.threshold is not None: |
---|
243 | self.pitch.set_tolerance(args.threshold) |
---|
244 | if args.silence is not None: |
---|
245 | self.pitch.set_silence(args.silence) |
---|
246 | super(process_pitch, self).__init__(args) |
---|
247 | def __call__(self, block): |
---|
248 | return self.pitch(block) |
---|
249 | def repr_res(self, res, frames_read, samplerate): |
---|
250 | fmt_out = self.time2string(frames_read, samplerate) |
---|
251 | sys.stdout.write(fmt_out + "%.6f\n" % res[0]) |
---|
252 | |
---|
253 | class process_beat(default_process): |
---|
254 | valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate'] |
---|
255 | def __init__(self, args): |
---|
256 | self.parse_options(args, self.valid_opts) |
---|
257 | self.tempo = aubio.tempo(**self.options) |
---|
258 | super(process_beat, self).__init__(args) |
---|
259 | def __call__(self, block): |
---|
260 | return self.tempo(block) |
---|
261 | def repr_res(self, res, frames_read, samplerate): |
---|
262 | if res[0] != 0: |
---|
263 | outstr = self.time2string(self.tempo.get_last(), samplerate) |
---|
264 | sys.stdout.write(outstr + '\n') |
---|
265 | |
---|
266 | class process_tempo(process_beat): |
---|
267 | def __init__(self, args): |
---|
268 | super(process_tempo, self).__init__(args) |
---|
269 | self.beat_locations = [] |
---|
270 | def repr_res(self, res, frames_read, samplerate): |
---|
271 | if res[0] != 0: |
---|
272 | self.beat_locations.append(self.tempo.get_last_s()) |
---|
273 | def flush(self, frames_read, samplerate): |
---|
274 | import numpy as np |
---|
275 | if len(self.beat_locations) < 2: |
---|
276 | outstr = "unknown bpm" |
---|
277 | else: |
---|
278 | bpms = 60./ np.diff(self.beat_locations) |
---|
279 | median_bpm = np.mean(bpms) |
---|
280 | if len(self.beat_locations) < 10: |
---|
281 | outstr = "%.2f bpm (uncertain)" % median_bpm |
---|
282 | else: |
---|
283 | outstr = "%.2f bpm" % median_bpm |
---|
284 | sys.stdout.write(outstr + '\n') |
---|
285 | |
---|
286 | class process_notes(default_process): |
---|
287 | valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate'] |
---|
288 | def __init__(self, args): |
---|
289 | self.parse_options(args, self.valid_opts) |
---|
290 | self.notes = aubio.notes(**self.options) |
---|
291 | super(process_notes, self).__init__(args) |
---|
292 | def __call__(self, block): |
---|
293 | return self.notes(block) |
---|
294 | def repr_res(self, res, frames_read, samplerate): |
---|
295 | if res[2] != 0: # note off |
---|
296 | fmt_out = self.time2string(frames_read, samplerate) |
---|
297 | sys.stdout.write(fmt_out + '\n') |
---|
298 | if res[0] != 0: # note on |
---|
299 | lastmidi = res[0] |
---|
300 | fmt_out = "%f\t" % lastmidi |
---|
301 | fmt_out += self.time2string(frames_read, samplerate) |
---|
302 | sys.stdout.write(fmt_out) # + '\t') |
---|
303 | def flush(self, frames_read, samplerate): |
---|
304 | eof = self.time2string(frames_read, samplerate) |
---|
305 | sys.stdout.write(eof + '\n') |
---|
306 | |
---|
307 | class process_mfcc(default_process): |
---|
308 | def __init__(self, args): |
---|
309 | valid_opts1 = ['hop_size', 'buf_size'] |
---|
310 | self.parse_options(args, valid_opts1) |
---|
311 | self.remap_pvoc_options(self.options) |
---|
312 | self.pv = aubio.pvoc(**self.options) |
---|
313 | |
---|
314 | valid_opts2 = ['buf_size', 'n_filters', 'n_coeffs', 'samplerate'] |
---|
315 | self.parse_options(args, valid_opts2) |
---|
316 | self.mfcc = aubio.mfcc(**self.options) |
---|
317 | |
---|
318 | # remember all options |
---|
319 | self.parse_options(args, list(set(valid_opts1 + valid_opts2))) |
---|
320 | |
---|
321 | super(process_mfcc, self).__init__(args) |
---|
322 | |
---|
323 | def __call__(self, block): |
---|
324 | fftgrain = self.pv(block) |
---|
325 | return self.mfcc(fftgrain) |
---|
326 | def repr_res(self, res, frames_read, samplerate): |
---|
327 | fmt_out = self.time2string(frames_read, samplerate) |
---|
328 | fmt_out += ' '.join(["% 9.7f" % f for f in res.tolist()]) |
---|
329 | sys.stdout.write(fmt_out + '\n') |
---|
330 | |
---|
331 | class process_melbands(default_process): |
---|
332 | def __init__(self, args): |
---|
333 | self.args = args |
---|
334 | valid_opts = ['hop_size', 'buf_size'] |
---|
335 | self.parse_options(args, valid_opts) |
---|
336 | self.remap_pvoc_options(self.options) |
---|
337 | self.pv = aubio.pvoc(**self.options) |
---|
338 | |
---|
339 | valid_opts = ['buf_size', 'n_filters'] |
---|
340 | self.parse_options(args, valid_opts) |
---|
341 | self.remap_pvoc_options(self.options) |
---|
342 | self.filterbank = aubio.filterbank(**self.options) |
---|
343 | self.filterbank.set_mel_coeffs_slaney(args.samplerate) |
---|
344 | |
---|
345 | super(process_melbands, self).__init__(args) |
---|
346 | def __call__(self, block): |
---|
347 | fftgrain = self.pv(block) |
---|
348 | return self.filterbank(fftgrain) |
---|
349 | def repr_res(self, res, frames_read, samplerate): |
---|
350 | fmt_out = self.time2string(frames_read, samplerate) |
---|
351 | fmt_out += ' '.join(["% 9.7f" % f for f in res.tolist()]) |
---|
352 | sys.stdout.write(fmt_out + '\n') |
---|
353 | |
---|
354 | def main(): |
---|
355 | parser = aubio_parser() |
---|
356 | args = parser.parse_args() |
---|
357 | if 'show_version' in args and args.show_version: |
---|
358 | sys.stdout.write('aubio version ' + aubio.version + '\n') |
---|
359 | sys.exit(0) |
---|
360 | elif 'verbose' in args and args.verbose > 3: |
---|
361 | sys.stderr.write('aubio version ' + aubio.version + '\n') |
---|
362 | if 'command' not in args or args.command is None: |
---|
363 | # no command given, print help and return 1 |
---|
364 | parser.print_help() |
---|
365 | sys.exit(1) |
---|
366 | elif not args.source_uri and not args.source_uri2: |
---|
367 | sys.stderr.write("Error: a source is required\n") |
---|
368 | parser.print_help() |
---|
369 | sys.exit(1) |
---|
370 | elif args.source_uri2 is not None: |
---|
371 | args.source_uri = args.source_uri2 |
---|
372 | try: |
---|
373 | # open source_uri |
---|
374 | with aubio.source(args.source_uri, hop_size=args.hop_size, |
---|
375 | samplerate=args.samplerate) as a_source: |
---|
376 | # always update args.samplerate to native samplerate, in case |
---|
377 | # source was opened with args.samplerate=0 |
---|
378 | args.samplerate = a_source.samplerate |
---|
379 | # create the processor for this subcommand |
---|
380 | processor = args.process(args) |
---|
381 | frames_read = 0 |
---|
382 | while True: |
---|
383 | # read new block from source |
---|
384 | block, read = a_source() |
---|
385 | # execute processor on this block |
---|
386 | res = processor(block) |
---|
387 | # print results for this block |
---|
388 | if args.verbose > 0: |
---|
389 | processor.repr_res(res, frames_read, a_source.samplerate) |
---|
390 | # increment total number of frames read |
---|
391 | frames_read += read |
---|
392 | # exit loop at end of file |
---|
393 | if read < a_source.hop_size: break |
---|
394 | # flush the processor if needed |
---|
395 | processor.flush(frames_read, a_source.samplerate) |
---|
396 | if args.verbose > 1: |
---|
397 | fmt_string = "read {:.2f}s" |
---|
398 | fmt_string += " ({:d} samples in {:d} blocks of {:d})" |
---|
399 | fmt_string += " from {:s} at {:d}Hz\n" |
---|
400 | sys.stderr.write(fmt_string.format( |
---|
401 | frames_read/float(a_source.samplerate), |
---|
402 | frames_read, |
---|
403 | frames_read // a_source.hop_size + 1, |
---|
404 | a_source.hop_size, |
---|
405 | a_source.uri, |
---|
406 | a_source.samplerate)) |
---|
407 | except KeyboardInterrupt as e: |
---|
408 | sys.exit(1) |
---|