/* // audiotxt is an audio-to-text command line tool // (c) jschulen-at-gmx.de 2002-2010, LGPL, comments are welcome // compile: gcc ... // or diet gcc -o audiotxt -O2 -g audiotxt.c // use: // arecord -f U8 -d 120 | ./audiotxt # 2m 8kHz 8bit 1MB/2m // arecord -f S16_LE -r32000 -d 60 | ./audiotxt # 1m 32kHz 16bit 4MB/1m // # above examples will probably generate soft-resamples from 48 kHz (lowQ) // # below try to get better sampling (check output for warnings) // arecord -v -f S16_LE -c2 -r96000 --disable-resample -D plughw:0,0 // cat ~/a.wav | ./audiotxt // // - see README for more info // // whats good for: // - connect to your radio-transceiver and never miss a call // - watch for noises in your home if you are away // - watch your baby // // ToDo: // - gamma rays + trigger: out peaks min above trigger + max below trigger // first get max noise by extend min max-min-max-amplitude seq. // - detect call-codes (incl. DTMF decoding, CW decoding, 5Tone) // - only store after detected call-code // - code/decode SMS via radio-transceiver // - encode as MP3 // - read from sound device (should be default, no delay) // - detect some spoken words // - srate=8000Hz(0.125ms) freq=50Hz(20ms)..4000Hz(0.25ms) // - 8ms(125Hz) // - parallel version, + more comments // Num Freq. measured in Ear? // ca.FFT (sin/cos-table approx. -1 +1 , or +1 +1 0 -1 -1 0) // log. freq : 1/2=4000 1/4=2000 1/8=1000 1/16=500 1/32=250 Hz // mult. freq : 1/2=4000 1/3=2666 1/4=2000 1/5=... 1/128=6Hz // lin. freq : 128/256=4000 127/32=3969 ... 2/256=62Hz 1/256=31Hz // sin of 2500Hz? // +1 -1 +1 -1 = 8000/2=4000Hz // +1 -1 +1 -1 -1 +1 // 0 +1 0 -1 0 +1 0 = 8000/4=2000Hz // 0 0 +1 0 0 -1 0 0 +1 0 0 = 8000/6Hz=1333Hz // pnm P5 -> highest-freq(1) .... lowestfreq(1/32) vs. time=down // phase = color? pnm.P6 ? or multichannel (max ampl. or phase relations) // multi-image-pnm? 256freq * 256*4ms // // remarks: // arecord -> fifo -> audiotxt (not possible, arecord does an rm) // arecord -> a.wav -> audiotxt -> a[00..15].wav // arecord -> stdout(wav) -> audiotxt.stdin -> wav // amixer ... ??? // decoupling of mic/spk possible? // // MIC --->--- MIXER --->--- A/D // | // v <<<< can be switched of? // | // D/A --->--- MIXER --->--- Speaker // // what does arecord?: // open("/dev/snd/pcmC0D0c", O_RDWR) = 4 // ioctl(4, AGPIOC_INFO, 0xbffff004) = 0 // old_mmap(NULL, 4096, PROT_READ, MAP_SHARED, 4, 0x80000000) = 0x40017000 // old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, 4, 0x81000000) = 0x40018000 // ioctl(4, AGPIOC_ACQUIRE, 0xbffff4c4) = 0 // ioctl(4, 0x800c4151, 0xbffff3e0) = 0 // write(3,"\200\201...",1000) // // 2012-02-22 better pulse classification (energy=sum_xi**2), two triggervals // 2011-03-12 pulse detection (for a nuclear radiation detector) // 2010-03-20 initial version, fft-debug mode, decoding DTMF + freq 8kHz // */ #include // #include // #include // mmap() // #include // needed for open() #include #include #include #include #include #ifndef VDATE // usually defined via Makefile, CC option #define VDATE "2012-02-19" #endif #define nfreq 250 // spectral density 4000Hz/nfreq=16Hz #define fbase 16 // frequency resolution 16Hz int fs=-1; /* sound-device if positive */ /* ToDo: write in cw_err.log or err_buf */ #define E1(xx,yy) { perror("perror");fprintf(stderr,"ERROR: L%d "xx"\n",__LINE__,yy);sleep(2); } #define F1(xx,yy) { E1(xx,yy);exit(1); } #define E0(xx) { perror("perror");fprintf(stderr,"ERROR: L%d "xx"\n",__LINE__);sleep(2); } #define F0(xx) { E0(xx);exit(1); } // measure time in ms int ms(){ struct timeval tv; gettimeofday(&tv,NULL); return tv.tv_usec/1000 + 1000*tv.tv_sec; } // block for some ms => store key-requests to buffer? void my_wait(int ms){ struct timeval tv; tv.tv_sec=ms/1000; tv.tv_usec=1000*(ms%1000); select(0,NULL,NULL,NULL,&tv); } void help(){ printf( "audiotxt (audio-to-text "VDATE") listen for sound.\n" " (c) 2010-2011 Joerg Schulenburg, License: free beer or LGPL\n" "Options:\n" " -h this help\n" " -q be quite\n" " -v[number] be verbose (multiple -v increments the level)\n" " -dwav dump a00.wav file\n" " -draw dump reduced PCM data to a00.dat file\n" " -dfft dump fft-image to pnm file (not implemented)\n" " -t trigger value (0=auto, default=100, ToDo)\n" "Examples:\n" " arecord -f S16_LE -r 8000 -d 3600 | ./audiotxt -dwav # 1h\n" " cat a.wav | ./audiotxt -v\n" " oggdec -Q -o - sampleDTMF.ogg | audiotxt -v -dfft\n" ); exit(0); } int getintx(unsigned char *buf, int len){ // most significant byte is last int i, val=0; for (i=0;i0) printf(" = %9u\n",val); return(val); } int oasc(const char *descr, unsigned char *buf, int len){ int i; printf(descr); for (i=0;i31 && ((unsigned)buf[i])<128) printf("%c",buf[i]); else printf("\\x%02x",(unsigned)buf[i]); } printf("\n"); return 0; } /* store 4byte or 2byte integer into buffer */ int store(unsigned char *buf, int len, int value) { int i; for (i=0;i>(8*i))&255; return 0; } // the maximum frequency is srate/2 (de: Abtastrate/2) // what can be done, to reduce CPU needs on demand? // - ifreq+=n would reduce the effective sample rate by n // - fmax=srate/2 (could be reduced by n down to 12kHz for speech) // absolute minimum is fmax=4kHz like poor ISDN phone (srate=8000) // - fmin = 4samples*srate/wsize ??? (for winsize=srate/32 (4ms) fmin=16Hz) // does it make sence to make linear spectrum? // - frequency f between fmax and fmax/2 useful? void fft(double *freq, // output array freq[nfreq] double freq0, // base frequency 16Hz*2*Pi/srate (resolution) int numfreq, // num frequence bands int *dbuf, int bsize, // input buffer, buffer size = srate*16=16s int i1, int wsize) { // start_window, wsize=winsize=srate/32=4ms double f, rc, rs; int i4, ifreq, ival2; static double rv=127; for (ifreq=0;ifreq0) printf("audiotxt running (-h for help) ...\n"); if (verbose>1) printf("mode_dump= %d dumpfile= %s\n", mode_dump, dump_file); // let the user see what will happen in case of bad or no input if (verbose>0) printf("audiotxt waiting for RIFF.WAVE from stdin ...\n"); nread=fread(w44,1,44,stdin); if (nread<44) {fprintf(stderr,"Error: fread<44\n");exit(1);} if (verbose>1) { oasc(" 0(4) ChunkID=RIFF : ", w44+ 0,4); ohex(" 4(4) ChunkSize=36+X : 0x",w44+ 4,4); oasc(" 8(4) Format=WAVE : ", w44+ 8,4); oasc("12(4) SubChunk1ID=fmt : ", w44+12,4); ohex("16(4) SubChunk1Size=16: 0x",w44+16,4); ohex("20(2) AudioFormat=1 : 0x",w44+20,2); ohex("22(2) NumChannels=1 : 0x",w44+22,2); ohex("24(4) SampleRate=8000 : 0x",w44+24,4); ohex("28(4) ByteRate =8000 : 0x",w44+28,4); ohex("32(2) BlockAlign=1 : 0x",w44+32,2); ohex("34(2) BitsPerSample=8 : 0x",w44+34,2); oasc("36(4) SubChunk2ID=data: ", w44+36,4); ohex("40(4) SubChunk2Size : 0x",w44+40,4); /* align*srate*time=brate*time */ ohex("44(2) AudioData : ... ",w44,0); } nc= getintx(w44+22,2); // num channels, mono=1 stereo=2 srate= getintx(w44+24,4); // samples/s align= getintx(w44+32,2); // in bytes = (nc*bps+fillbits)/8 bps= getintx(w44+34,2); // BitsPerSample (per channel) dsize= getintx(w44+40,4); // align is the size of a single multi channel sample in bytes // and must fit into buf[16] (8*int16 samples or 2*int64 samples) if (bps >24) {fprintf(stderr,"Error: bps>24\n"); exit(1);} if (align>16) {fprintf(stderr,"Error: BlockAlign>16\n");exit(1);} dbuf=(int *)malloc(16*srate*sizeof(int)); /* 16s data buffer */ if (!dbuf) {fprintf(stderr,"Error: malloc failed\n");} // set buffer to 0 to allow defined FFT from the beginning if (dbuf) for (i1=0;i1<16*srate;i1++) dbuf[i1]=0; buf[0]=buf[1]=buf[2]=buf[3]=0; max1=max2=0; nsamples = dsize / align; /* number of samples, 0=endless */ // read in data sample by sample (multiple channel samples) for (i1=0;i11) { if (i1==0) printf("\n"); if (i1< 10) { printf(" sample= %8d hex=", i1); for(i2=0;i2=nc*bps) for (i2=0;i21 // ToDo: diff val-old_val??? // now we try to compute interesting events de:Ereignisse if (abs(val) >max0 ) max0= abs(val); if (abs(val) >max1 ) max1= abs(val); if (abs(val)*3/4>dtrigger) { dtrigger= abs(val)*3/4; if (verbose>2 && !strigger) printf(" sample= %8d dtrigger= %5d\n", i1, dtrigger); } max2 += abs(val); // sum all, ToDo de: gleitender Mittelwert??? // ToDo?: meanFreq.? wmin1-wmax1-wmin2-wmax2 (one wave?) // - von hoechster Frequenz sample/2 a[i]-a[i-1] bis buffersize bzw. i1 // detect periodic signal and its freq // reset after 4*min_period samples have below half amplitude // ToDo: simulate schmidt-trigger! show freq-changes + len pulses { static int last_up=0, last_down=0, max_val=16, last_up2=0, first_up=0 /* first wave */, num_periods=0; int trigg=(strigger)?strigger:dtrigger; // ToDo or last_step>trigger if (abs(val) > trigg*8/16) { if (val > 0 && last_up ==0 ) { last_up2=last_up; last_up=i1;} if (val < 0 && last_down==0 && last_up) last_down=i1; if (val > 0 && last_up && last_down) { // have up-down-up-sequence if (first_up==0) first_up=last_up; num_periods++; // time/[s] if (verbose>2 && num_periods<32) // lot of output printf("D3 t= %7.3f up-down-up= %5d %5d val= %4d nump= %4d f= %4d\n", i1*1./srate, i1-last_up, i1-last_down, val, num_periods, num_periods*srate/(i1-first_up)); // if num_periods==8 if (abs((i1 - last_up) - (last_up - last_up2)) > 1+(i1-last_up2)/8 || num_periods==8){ // time/[s] if (num_periods>7) printf("D3 t= %7.3f trigg= %5d len= %6d * %4d FREQ %5d" " +- %4d\n", i1*1./srate, trigg, num_periods, (last_up-first_up)/(num_periods-1), (int)((num_periods-1)*(long long)srate/ (last_up-first_up) ), (int)((num_periods-1)*(long long)srate/(2*(last_up-first_up)-1)) - (int)((num_periods-1)*(long long)srate/(2*(last_up-first_up)+1))); if (abs((i1 - last_up) - (last_up - last_up2)) > 1+(i1-last_up2)/8) { num_periods=1; first_up=last_up; } } last_down=0;last_up2=last_up;last_up=i1; // search new wave } } } // max_freq // Pulse Detection Algorithm (PDA) // detect pulses, besser diff val > 4* maxdiff last 8 samples // no slow pulses (step>1?) // state=0: 8++ low diffs (noise) // state=1: 0..7 between diff // state=2: 1++ * high diff ... until 3* low<=trigg/2 // state=3: full pulse detected, changes to state=0 // use buffer, detect trigg + w=width_pulse + 4w*noise before and after? // ToDo: output also pulse volume for better gamma pulse histograms { static int old_state=0, state=0, pstart=0, pmax=0, // pulse maximum (above trigger) per pulse bmax=0, // max between trigg/2 and trigg per pulse nmax=0, // noise maximum (below trigger) per pulse last_val=0, // last value last_val2=0, // 2nd last value vdiff, // max(abs(val-last_val),abs(val-last_val2)) maxpvol=0, maxpabs=0, pvol=0, pabs=0, // peak volume 2012-02-21 ringbuf[16]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, ringbp=0, // round robin pointer to ringbuffer samples[5]={0,0,0,0,0}; int trigg=(strigger)?strigger:dtrigger/2; ringbuf[ringbp]=val; // 16 value ringbuffer pvol+= val*val; // add new value pabs+= abs(val); ringbp=(ringbp+1)&15; pvol-= ringbuf[ringbp]*ringbuf[ringbp]; // subtract old value pabs-= abs(ringbuf[ringbp]); if (maxpvol vdiff) vdiff= abs(val-last_val2); if (vdiff>=abs(trigg/2) && vdiff< abs(trigg) && vdiff> abs(bmax)) bmax=vdiff; // max diff>trigg/2 if (vdiff< abs(trigg/2) && vdiff> abs(nmax)) nmax=vdiff; // max diff=abs(trigg) && vdiff> abs(pmax)) pmax=vdiff; // max diff>trigg if (state==0) { // noise if (vdiff< abs(trigg)/2) { if (samples[0]<63) samples[0]++; } // noise before pulse if (vdiff>=abs(trigg)/2) { if (samples[0]>7) { state=1; samples[1]=0; } // end noise else { samples[0]=0; } // end noise } maxpvol=maxpabs=0; // reset } if (state==1) { // between noise and pulse if (vdiff< abs(trigg)) { samples[1]++; // between if (samples[1]>7) { state=0; samples[0]=0; } // back to noise } if (vdiff>=abs(trigg)) { pstart=i1; state=2; samples[3]=samples[2]=0; // start pulse } } if (state==2) { // pulse (decay) if (vdiff>=abs(trigg)) { samples[2]+=samples[3]+1; samples[3]=0; } if (vdiff6) { state=3; } // end-of-Pulse back to noise } } // DEBUG verbose=3 and state change // DEBUG verbode=4 and not noise // time/[s] if ((state!=old_state && verbose>2) || (state>0 && verbose>3)) { printf("D3 t= %7.3f P%d %3d max= %5d %5d %6d %5d %5d plen= %3d %2d %2d %2d\n", i1*1./srate, state, num_pulses, nmax, bmax, pmax, 0, 0, samples[0], samples[1], samples[2], samples[3]); old_state=state; } if (state==3) { // full pulse state 1+2+3 num_pulses++; if (pmax>=strigger2) num_pulses2++; // hitting higher triggervalue // DEBUG$level time/[s] PULSE ... if (verbose>1) // ToDo: stop output on highDensePulses printf("D2 t= %7.3f PULSE %3d max= %5d %5d %6d %5d %5d plen= %3d %2d %2d\n", pstart*1./srate, num_pulses, nmax, bmax, pmax, (int)sqrt(1.*maxpvol), maxpabs, samples[0], samples[1], samples[2]); // nmax = maxvdifftrigg // ToDo: also out maxval (may have better statistical characteristics) // or array of pulse (sum square val of max of last 10 pixel) // sum of square of deviations if (last_pulse && (min_pulse_dist>pstart-last_pulse || !min_pulse_dist)) min_pulse_dist=pstart-last_pulse; last_pulse=pstart; // we need this to compute the next gap pmax=bmax=nmax=0; old_state=state=0; samples[3]=samples[2]=samples[1]=0; samples[0]=samples[3]; } // OK last_val2= last_val; last_val= val; } // ToDo: if mean(100ms) is changing or noise (max/4) or noise=min(mean100ms) if (i1%srate==0) { // every second (sample_rate) static int old_max=0; if (i1>0 && i1%(srate*10)==0 && max1!=old_max) { // reduce output (only changes) if (verbose>2) printf("D3 t= %7.3f sample= %8d max=%8u mean=%8u\n", i1*1./srate, i1, max1, max2/srate); fflush(stdout); old_max=max1-max1/16; // output only new maximums } max2=0; max1-=max1/16; // de:Daempfung for maximum dtrigger-=dtrigger/16; // decay } // start fft (and optional fft-dump) // start dumpfile after triggering (ToDo: subfunction) // first check if we have to initialize the file if (numwrite==0 && ((strigger>0 && val>=strigger) // static_trigger || (strigger<1 && val>=dtrigger))) { // dynamic_trigger // ToDo: start fft? (general 4ms units)? if (mode_dump) { fp=fopen(dump_file,"w"); if (!fp) {fprintf(stderr,"Error popen\n");}; if (mode_dump==1) { // write header numwrite=srate*5; /* next 5s */ store(w44+ 4, 4, 36+numwrite*align); /* new chunksize */ store(w44+40, 4, numwrite*align); /* new data-subchunksize */ store(w44+22, 2, 1); /* new numchannels */ if (fp) if (fwrite(w44,1,44,fp)!=44) fprintf(stderr,"Error fwrite\n"); } else if (mode_dump==2 && fp) { fprintf(fp,"# time raw-values\n"); numwrite=srate/8; /* next 0.125s */ } if (mode_dump==4 && fp) { fprintf(fp,"P2\n#FFT x=(0,%.0f,..%.0fHz) y=time(%.1fms)\n" // 16Hz..4000Hz "%d 256\n255\n", (double)fbase, (double)fbase*nfreq, 32000./srate, nfreq); numwrite=srate/32; /* next 8000/32=250 samples, 4ms */ } } } if (numwrite>0 && fp) { /* write next numwrite samples to *fp */ if (mode_dump==1) { /* write next numwrite samples to *fp */ i3=0; if (align>bps/8) if (fwrite(&i3 ,align-bps/8,1,fp)!=1) { fprintf(stderr,"Error fwrite\n");numwrite=1; }; /* ToDo: Format Korrektur to LE ... */ if (fwrite(&val, bps/8,1,fp)!=1) { fprintf(stderr,"Error fwrite\n");numwrite=1; }; } if (mode_dump==2) { /* write next numwrite samples to *fp */ fprintf(fp," %9.6f % 6d\n", i1*1./srate, val); } // ToDo: decouple FFT from fft-dump // (powersave: optional trigger fft by minimum amplitude) if (mode_dump==4 && (numwrite%(srate/32)==1) && dbuf) { /* one line per 32 samples = 3ms */ int i4, max1, max2; static int omax1=0, omax2=0, // frequency of maximum intensity imax1=0, imax2=0, // maximum intensity (old) imin1=255, imin2=255, // minimum intensity tstart=0; static double freq[nfreq]; // out[] 2*M_PI*16Hz buf[i1] bufsize winsize fft(freq, 2*M_PI*fbase/srate, nfreq, dbuf, 16*srate, i1, srate/32); // "01"=941+1336Hz,697+1209Hz = (59+84,44+76)*16Hz // 8000/s: 4.5s (wav) = 1.6s (600MHz) // 16000/s: 4.5s (wav) = 2.7s (600MHz) for (i4=0;i45) max1=i4; if (freq[max2]5) max2=i4; } // at the end the 2 frequencies of DTMF may change relative amplitude if (abs(freq[max1]-freq[max2])*8<(freq[max1]+freq[max2]) // diff<25% && abs(max2-omax1)<6 && abs(max1-omax2)<6) { int i=max1;max1=max2;max2=i; } if ((freq[max2]-freq[max1])*8>(freq[max1]+freq[max2])) // diff>25% { int i=max1;max1=max2;max2=i; } // allow small drift for growing maximum (happens at the beginning) if (imax1127) {imax1=freq[max1]; omax1=max1;} if (imax2127) {imax2=freq[max2]; omax2=max2;} // twist > 4dB = 2.5, 8dB = 6.3 (nonDTMF, Morse) // if (freq[max2]*4=nsamples // EOF || (abs(max1-omax1)>5 && freq[max1]>127) // new freq1 || (abs(max2-omax2)>5 && freq[max2]>127) // new freq2 || ( freq[max1]<80 && imin1>=80) // new silence? || ( freq[max1]>127 && imax1<128) // new tone? ) { int dtmf1, dtmf2, dtmf=0; const char dtmfdecode[17]="147*2580369#ABCD"; printf(" freq/Hz= %4d %4d" " a= %3d - %3d %3d - %3d" // amplitude " t/ms= %5.0f", omax1*(int)fbase, omax2*(int)fbase, imin1, imax1, imin2, imax2, (i1-tstart)*1000./srate); // sort low and high frequency for better decoding if (omax10 && imax1>127) printf(" DTMF %c", (char) dtmf); if (imax1<80 && imin1==0) printf(" silence"); else if (imax1<128 && imin1<80) printf(" noise"); // morse? if (imax1>127 && imax1>5*(imax2+1)) printf(" sinus%.0f", omax1*1.*fbase); printf("\n"); // replace old tone by new tone imin1=imax1=freq[max1]; omax1=max1; imin2=imax2=freq[max2]; omax2=max2; if (imax1==0) omax1=0; // silence? if (imax2==0) omax2=0; tstart=i1; } if (imin1>freq[max1]) imin1=freq[max1]; if (imax1freq[max2]) imin2=freq[max2]; if (imax20 } // i1=0 ... nsamples-1 (main loop, read each sample) // final output of maximum // v2011: END max=%8u trigger=%8u pulses= %4d # pulses=$7 // v2012: END ... $3max $4trigg ... $6mindist $7np $8np2 printf(" END maxV,trig %5u %5u pulse.minDt,num,n2 %7d %4d %4d\n", max0, strigger, min_pulse_dist, num_pulses, num_pulses2); // strigger= static trigger value // max0= abs max (old: max1= dyn.maximum (-1/16)) // pulsdist=min_pulse_dist*1./srate=[s] Bsp=600s*48000/s=28.8e6 // num_pulses (above trigger) // num_pulses2 (above strigger2) used for Geiger counter with 2 tubes free(dbuf); /* 16s data history */ return(0); }