| Adds silence detection and lays the groundwork for per-call reanalysis - warvox… | |
| Log | |
| Files | |
| Refs | |
| README | |
| --- | |
| commit ebfa9cc7945f832c08fdcf59f33c2271c1a550b9 | |
| parent 772b9eccbebf473d60ffe78cd613ff0d2daacf37 | |
| Author: HD Moore <[email protected]> | |
| Date: Thu, 5 Mar 2009 15:15:39 +0000 | |
| Adds silence detection and lays the groundwork for per-call reanalysis | |
| Diffstat: | |
| M lib/warvox/jobs/analysis.rb | 442 ++++++++++++++++++-----------… | |
| 1 file changed, 251 insertions(+), 191 deletions(-) | |
| --- | |
| diff --git a/lib/warvox/jobs/analysis.rb b/lib/warvox/jobs/analysis.rb | |
| @@ -34,7 +34,7 @@ class Analysis < Base | |
| model = get_job | |
| model.processed = true | |
| - model.save | |
| + db_save(model) | |
| stop() | |
| @@ -52,206 +52,266 @@ class Analysis < Base | |
| todo.each do |r| | |
| next if r.processed | |
| next if not r.completed | |
| - next if r.busy | |
| - next if not r.rawfile | |
| - next if not File.exist?(r.rawfile) | |
| - | |
| - bname = r.rawfile.gsub(/\..*/, '') | |
| - num = r.number | |
| - | |
| - # | |
| - # Create the signature database | |
| - # | |
| - | |
| - raw = WarVOX::Audio::Raw.from_file(r.rawfile) | |
| - flow = raw.to_flow | |
| - fd = File.new("#{bname}.sig", "wb") | |
| - fd.write flow | |
| - fd.close | |
| - | |
| - # Save the signature data | |
| - r.sig_data = flow | |
| - | |
| - # | |
| - # Create a raw decompressed file | |
| - # | |
| - | |
| - # Decompress the audio file | |
| - rawfile = Tempfile.new("rawfile") | |
| - datfile = Tempfile.new("datfile") … | |
| - | |
| - # Data files for audio processing and signal graph | |
| - cnt = 0 | |
| - rawfile.write(raw.samples.pack('v*')) | |
| - datfile.write(raw.samples.map{|val| cnt +=1; "#{cnt/80… | |
| - rawfile.flush | |
| - datfile.flush | |
| - | |
| - # Data files for spectrum plotting | |
| - frefile = Tempfile.new("frefile") | |
| - | |
| - # Perform a DFT on the samples | |
| - res = KissFFT.fftr(8192, 8000, 1, raw.samples) | |
| - | |
| - # Calculate the peak frequencies for the sample | |
| - maxf = 0 | |
| - maxp = 0 | |
| - tones = {} | |
| - res.each do |x| | |
| - rank = x.sort{|a,b| a[1].to_i <=> b[1].to_i }.… | |
| - rank[0..10].each do |t| | |
| - f = t[0].round | |
| - p = t[1].round | |
| - next if f == 0 | |
| - next if p < 1 | |
| - tones[ f ] ||= [] | |
| - tones[ f ] << t | |
| - if(t[1] > maxp) | |
| - maxf = t[0] | |
| - maxp = t[1] | |
| - end | |
| + next if r.busy | |
| + analyze_call(r) | |
| + end | |
| + end | |
| + | |
| + def analyze_call(r) | |
| + | |
| + return if not r.rawfile | |
| + return if not File.exist?(r.rawfile) | |
| + | |
| + bname = r.rawfile.gsub(/\..*/, '') | |
| + num = r.number | |
| + | |
| + # | |
| + # Create the signature database | |
| + # | |
| + raw = WarVOX::Audio::Raw.from_file(r.rawfile) | |
| + flow = raw.to_flow | |
| + fd = File.new("#{bname}.sig", "wb") | |
| + fd.write "#{num} #{flow}\n" | |
| + fd.close | |
| + | |
| + # Save the signature data | |
| + r.sig_data = flow | |
| + | |
| + # | |
| + # Create a raw decompressed file | |
| + # | |
| + | |
| + # Decompress the audio file | |
| + rawfile = Tempfile.new("rawfile") | |
| + datfile = Tempfile.new("datfile") | |
| + | |
| + # Data files for audio processing and signal graph | |
| + cnt = 0 | |
| + rawfile.write(raw.samples.pack('v*')) | |
| + datfile.write(raw.samples.map{|val| cnt +=1; "#{cnt/8000.0} #{… | |
| + rawfile.flush | |
| + datfile.flush | |
| + | |
| + # Data files for spectrum plotting | |
| + frefile = Tempfile.new("frefile") | |
| + | |
| + # Perform a DFT on the samples | |
| + res = KissFFT.fftr(8192, 8000, 1, raw.samples) | |
| + | |
| + # Calculate the peak frequencies for the sample | |
| + maxf = 0 | |
| + maxp = 0 | |
| + tones = {} | |
| + res.each do |x| | |
| + rank = x.sort{|a,b| a[1].to_i <=> b[1].to_i }.reverse | |
| + rank[0..10].each do |t| | |
| + f = t[0].round | |
| + p = t[1].round | |
| + next if f == 0 | |
| + next if p < 1 | |
| + tones[ f ] ||= [] | |
| + tones[ f ] << t | |
| + if(t[1] > maxp) | |
| + maxf = t[0] | |
| + maxp = t[1] | |
| end | |
| end | |
| - | |
| - # Save the peak frequency | |
| - r.peak_freq = maxf | |
| - | |
| - # Calculate average frequency and peaks over time | |
| - avg = {} | |
| - pks = [] | |
| - res.each do |slot| | |
| - pks << slot.sort{|a,b| a[1] <=> b[1] }.reverse… | |
| - slot.each do |freq| | |
| - avg[ freq[0] ] ||= 0 | |
| - avg[ freq[0] ] += freq[1] | |
| - end | |
| + end | |
| + | |
| + # Save the peak frequency | |
| + r.peak_freq = maxf | |
| + | |
| + # Calculate average frequency and peaks over time | |
| + avg = {} | |
| + pks = [] | |
| + res.each do |slot| | |
| + pks << slot.sort{|a,b| a[1] <=> b[1] }.reverse[0] | |
| + slot.each do |freq| | |
| + avg[ freq[0] ] ||= 0 | |
| + avg[ freq[0] ] += freq[1] | |
| end | |
| - | |
| - # Save the peak frequencies over time | |
| - r.peak_freq_data = pks.map{|f| "#{f[0]}-#{f[1]}" }.joi… | |
| - | |
| - # Generate the frequency file | |
| - avg.keys.sort.each do |k| | |
| - avg[k] = avg[k] / res.length | |
| - frefile.write("#{k} #{avg[k]}\n") | |
| + end | |
| + | |
| + # Save the peak frequencies over time | |
| + r.peak_freq_data = pks.map{|f| "#{f[0]}-#{f[1]}" }.join(" ") | |
| + | |
| + # Generate the frequency file | |
| + avg.keys.sort.each do |k| | |
| + avg[k] = avg[k] / res.length | |
| + frefile.write("#{k} #{avg[k]}\n") | |
| + end | |
| + frefile.flush | |
| + | |
| + # | |
| + # XXX: store significant frequencies somewhere (tones) | |
| + # | |
| + | |
| + # Make a guess as to what kind of phone number we found | |
| + line_type = nil | |
| + while(not line_type) | |
| + | |
| + # Look for silence | |
| + if(flow.split(/\s+/).grep(/^H,/).length == 0) | |
| + line_type = 'silence' | |
| + break | |
| end | |
| - frefile.flush | |
| - | |
| - # | |
| - # XXX: store significant frequencies somewhere (tones) | |
| - # | |
| - | |
| - # Make a guess as to what kind of phone number we found | |
| - line_type = nil | |
| - while(not line_type) | |
| - | |
| - # Look for modems by detecting 2250hz tones | |
| - f_2250 = 0 | |
| - pks.each{|f| f_2250 += 1 if(f[0] > 2240 and f[… | |
| - if(f_2250 > 2) | |
| - line_type = 'modem' | |
| - break | |
| - end | |
| - | |
| - # Look for the 1000hz voicemail BEEP | |
| - if(r.peak_freq > 990 and r.peak_freq < 1010) | |
| - line_type = 'voicemail' | |
| - break | |
| - end | |
| - # Most faxes have at least two of the followin… | |
| - f_1625 = f_1660 = f_1825 = f_2100 = false | |
| - pks.each do |f| | |
| - # $stderr.puts "#{r.number} #{f.inspec… | |
| - f_1625 = true if(f[0] > 1620 and f[0] … | |
| - f_1660 = true if(f[0] > 1655 and f[0] … | |
| - f_1825 = true if(f[0] > 1820 and f[0] … | |
| - f_2100 = true if(f[0] > 2090 and f[0] … | |
| - end | |
| - if([ f_1625, f_1660, f_1825, f_2100 ].grep(tru… | |
| - line_type = 'fax' | |
| - break | |
| - end | |
| - | |
| - # Dial tone detection | |
| - f_440 = false | |
| - f_350 = false | |
| - pks.each do |f| | |
| - f_440 = true if(f[0] > 435 and f[0] < … | |
| - f_345 = true if(f[0] > 345 and f[0] < … | |
| - end | |
| - if(f_440 and f_350) | |
| - line_type = 'dialtone' | |
| - break | |
| - end | |
| - | |
| - # Detect humans based on long pauses | |
| - | |
| - # Default to voice | |
| - line_type = 'voice' | |
| + # Look for modems by detecting 2250hz tones | |
| + f_2250 = 0 | |
| + pks.each{|f| f_2250 += 1 if(f[0] > 2240 and f[0] < 226… | |
| + if(f_2250 > 2) | |
| + line_type = 'modem' | |
| + break | |
| + end | |
| + | |
| + # Look for the 1000hz voicemail BEEP | |
| + if(r.peak_freq > 990 and r.peak_freq < 1010) | |
| + line_type = 'voicemail' | |
| + break | |
| + end | |
| + | |
| + # Most faxes have at least two of the following tones | |
| + f_1625 = f_1660 = f_1825 = f_2100 = false | |
| + pks.each do |f| | |
| + # $stderr.puts "#{r.number} #{f.inspect}" | |
| + f_1625 = true if(f[0] > 1620 and f[0] < 1630) | |
| + f_1660 = true if(f[0] > 1655 and f[0] < 1665) | |
| + f_1825 = true if(f[0] > 1820 and f[0] < 1830) | |
| + f_2100 = true if(f[0] > 2090 and f[0] < 2110) … | |
| end | |
| + if([ f_1625, f_1660, f_1825, f_2100 ].grep(true).lengt… | |
| + line_type = 'fax' | |
| + break | |
| + end | |
| - # Save the guessed line type | |
| - r.line_type = line_type | |
| - | |
| - # Plot samples to a graph | |
| - plotter = Tempfile.new("gnuplot") | |
| - | |
| - plotter.puts("set ylabel \"Signal\"") | |
| - plotter.puts("set xlabel \"Seconds\"") | |
| - plotter.puts("set terminal png medium size 640,480 tra… | |
| - plotter.puts("set output \"#{bname}_big.png\"") | |
| - plotter.puts("plot \"#{datfile.path}\" using 1:2 title… | |
| - plotter.puts("set output \"#{bname}_big_dots.png\"") | |
| - plotter.puts("plot \"#{datfile.path}\" using 1:2 title… | |
| - | |
| - plotter.puts("set terminal png medium size 640,480 tra… | |
| - plotter.puts("set ylabel \"Power\"") | |
| - plotter.puts("set xlabel \"Frequency\"") | |
| - plotter.puts("set output \"#{bname}_freq_big.png\"") | |
| - plotter.puts("plot \"#{frefile.path}\" using 1:2 title… | |
| - | |
| - plotter.puts("set ylabel \"Signal\"") | |
| - plotter.puts("set xlabel \"Seconds\"") | |
| - plotter.puts("set terminal png small size 160,120 tran… | |
| - plotter.puts("set format x ''") | |
| - plotter.puts("set format y ''") | |
| - plotter.puts("set output \"#{bname}.png\"") | |
| - plotter.puts("plot \"#{datfile.path}\" using 1:2 notit… | |
| - | |
| - plotter.puts("set ylabel \"Power\"") | |
| - plotter.puts("set xlabel \"Frequency\"") … | |
| - plotter.puts("set terminal png small size 160,120 tran… | |
| - plotter.puts("set format x ''") | |
| - plotter.puts("set format y ''") | |
| - plotter.puts("set output \"#{bname}_freq.png\"") | |
| - plotter.puts("plot \"#{frefile.path}\" using 1:2 notit… | |
| - plotter.flush | |
| - | |
| - system("gnuplot #{plotter.path}") | |
| - File.unlink(plotter.path) | |
| - File.unlink(datfile.path) | |
| - File.unlink(frefile.path) | |
| - plotter.close | |
| - datfile.close | |
| - frefile.path | |
| - | |
| - # Generate a MP3 audio file | |
| - system("sox -s -w -r 8000 -t raw -c 1 #{rawfile.path} … | |
| - system("lame #{bname}.wav #{bname}.mp3 >/dev/null 2>&1… | |
| - File.unlink("#{bname}.wav") | |
| - File.unlink(rawfile.path) | |
| - rawfile.close | |
| - | |
| - # Save the changes | |
| - r.processed = true | |
| - r.processed_at = Time.now | |
| - db_save(r) | |
| - | |
| - clear_zombies() | |
| + # Dial tone detection | |
| + f_440 = false | |
| + f_350 = false | |
| + pks.each do |f| | |
| + f_440 = true if(f[0] > 435 and f[0] < 445) | |
| + f_345 = true if(f[0] > 345 and f[0] < 355) | |
| + end | |
| + if(f_440 and f_350) | |
| + line_type = 'dialtone' | |
| + break | |
| + end | |
| + | |
| + # Detect humans based on long pauses | |
| + | |
| + # Default to voice | |
| + line_type = 'voice' | |
| end | |
| + | |
| + # Save the guessed line type | |
| + r.line_type = line_type | |
| + | |
| + # Plot samples to a graph | |
| + plotter = Tempfile.new("gnuplot") | |
| + | |
| + plotter.puts("set ylabel \"Signal\"") | |
| + plotter.puts("set xlabel \"Seconds\"") | |
| + plotter.puts("set terminal png medium size 640,480 transparent… | |
| + plotter.puts("set output \"#{bname}_big.png\"") | |
| + plotter.puts("plot \"#{datfile.path}\" using 1:2 title \"#{num… | |
| + plotter.puts("set output \"#{bname}_big_dots.png\"") | |
| + plotter.puts("plot \"#{datfile.path}\" using 1:2 title \"#{num… | |
| + | |
| + plotter.puts("set terminal png medium size 640,480 transparent… | |
| + plotter.puts("set ylabel \"Power\"") | |
| + plotter.puts("set xlabel \"Frequency\"") | |
| + plotter.puts("set output \"#{bname}_freq_big.png\"") | |
| + plotter.puts("plot \"#{frefile.path}\" using 1:2 title \"#{num… | |
| + | |
| + plotter.puts("set ylabel \"Signal\"") | |
| + plotter.puts("set xlabel \"Seconds\"") | |
| + plotter.puts("set terminal png small size 160,120 transparent") | |
| + plotter.puts("set format x ''") | |
| + plotter.puts("set format y ''") | |
| + plotter.puts("set output \"#{bname}.png\"") | |
| + plotter.puts("plot \"#{datfile.path}\" using 1:2 notitle with … | |
| + | |
| + plotter.puts("set ylabel \"Power\"") | |
| + plotter.puts("set xlabel \"Frequency\"") … | |
| + plotter.puts("set terminal png small size 160,120 transparent") | |
| + plotter.puts("set format x ''") | |
| + plotter.puts("set format y ''") | |
| + plotter.puts("set output \"#{bname}_freq.png\"") | |
| + plotter.puts("plot \"#{frefile.path}\" using 1:2 notitle with … | |
| + plotter.flush | |
| + | |
| + system("gnuplot #{plotter.path}") | |
| + File.unlink(plotter.path) | |
| + File.unlink(datfile.path) | |
| + File.unlink(frefile.path) | |
| + plotter.close | |
| + datfile.close | |
| + frefile.path | |
| + | |
| + # Generate a MP3 audio file | |
| + system("sox -s -w -r 8000 -t raw -c 1 #{rawfile.path} #{bname}… | |
| + system("lame #{bname}.wav #{bname}.mp3 >/dev/null 2>&1") | |
| + File.unlink("#{bname}.wav") | |
| + File.unlink(rawfile.path) | |
| + rawfile.close | |
| + | |
| + # Save the changes | |
| + r.processed = true | |
| + r.processed_at = Time.now | |
| + db_save(r) | |
| + | |
| + clear_zombies() | |
| end | |
| +end | |
| + | |
| +class CallAnalysis < Analysis | |
| + | |
| + require 'fileutils' | |
| + | |
| + @@kissfft_loaded = false | |
| + begin | |
| + require 'kissfft' | |
| + @@kissfft_loaded = true | |
| + rescue ::LoadError | |
| + end | |
| + | |
| + def type | |
| + 'call_analysis' | |
| + end | |
| + | |
| + def initialize(result_id) | |
| + @name = result_id | |
| + if(not @@kissfft_loaded) | |
| + raise RuntimeError, "The KissFFT module is not availab… | |
| + end | |
| + end | |
| + | |
| + def get_job | |
| + ::DialResult.find(@name) | |
| + end | |
| + | |
| + def start | |
| + @status = 'active' | |
| + | |
| + begin | |
| + start_processing() | |
| + stop() | |
| + rescue ::Exception => e | |
| + $stderr.puts "Exception in the job queue: #{e.class} #… | |
| + end | |
| + end | |
| + | |
| + def stop | |
| + @status = 'completed' | |
| + end | |
| + | |
| + def start_processing | |
| + r = get_job() | |
| + return if not r.completed | |
| + return if r.busy | |
| + analyze_call(r) | |
| + end | |
| end | |
| + | |
| end | |
| end |