Create a waveform of the full track with Web Audio API

Question 1

Create a waveform of the full track with Web Audio API

javascript html audio canvas web-audio-api

cocco · Feb 27, 2014 · Viewed 19.6k times · Source

Answer

Answer

Ok, so what i would do is to load the sound with an XMLHttpRequest, then decode it using webaudio, then display it 'carefully' to have the colors you are searching for.

I just made a quick version, copy-pasting from various of my projects, it is quite working, as you might see with this picture :

enter image description here

The issue is that it is slow as hell. To have (more) decent speed, you'll have to do some computation to reduce the number of lines to draw on the canvas, because at 441000 Hz, you very quickly get too many lines to draw.

// AUDIO CONTEXT
window.AudioContext = window.AudioContext || window.webkitAudioContext ;

if (!AudioContext) alert('This site cannot be run in your Browser. Try a recent Chrome or Firefox. ');

var audioContext = new AudioContext();
var currentBuffer  = null;

// CANVAS
var canvasWidth = 512,  canvasHeight = 120 ;
var newCanvas   = createCanvas (canvasWidth, canvasHeight);
var context     = null;

window.onload = appendCanvas;
function appendCanvas() { document.body.appendChild(newCanvas);
                          context = newCanvas.getContext('2d'); }

// MUSIC LOADER + DECODE
function loadMusic(url) {   
    var req = new XMLHttpRequest();
    req.open( "GET", url, true );
    req.responseType = "arraybuffer";    
    req.onreadystatechange = function (e) {
          if (req.readyState == 4) {
             if(req.status == 200)
                  audioContext.decodeAudioData(req.response, 
                    function(buffer) {
                             currentBuffer = buffer;
                             displayBuffer(buffer);
                    }, onDecodeError);
             else
                  alert('error during the load.Wrong url or cross origin issue');
          }
    } ;
    req.send();
}

function onDecodeError() {  alert('error while decoding your file.');  }

// MUSIC DISPLAY
function displayBuffer(buff /* is an AudioBuffer */) {
   var leftChannel = buff.getChannelData(0); // Float32Array describing left channel     
   var lineOpacity = canvasWidth / leftChannel.length  ;      
   context.save();
   context.fillStyle = '#222' ;
   context.fillRect(0,0,canvasWidth,canvasHeight );
   context.strokeStyle = '#121';
   context.globalCompositeOperation = 'lighter';
   context.translate(0,canvasHeight / 2);
   context.globalAlpha = 0.06 ; // lineOpacity ;
   for (var i=0; i<  leftChannel.length; i++) {
       // on which line do we get ?
       var x = Math.floor ( canvasWidth * i / leftChannel.length ) ;
       var y = leftChannel[i] * canvasHeight / 2 ;
       context.beginPath();
       context.moveTo( x  , 0 );
       context.lineTo( x+1, y );
       context.stroke();
   }
   context.restore();
   console.log('done');
}

function createCanvas ( w, h ) {
    var newCanvas = document.createElement('canvas');
    newCanvas.width  = w;     newCanvas.height = h;
    return newCanvas;
};


loadMusic('could_be_better.mp3');

Edit : The issue here is that we have too much data to draw. Take a 3 minutes mp3, you'll have 3*60*44100 = about 8.000.000 line to draw. On a display that has, say, 1024 px resolution, that makes 8.000 lines per pixel...
In the code above, the canvas is doing the 'resampling', by drawing lines with low-opacity and in 'ligther' composition mode (e.g. pixel's r,g,b will add-up).
To speed-up things, you have to re-sample by yourself, but to get some colors, it's not just a down-sampling, you'll have to handle a set (within a performance array most probably) of 'buckets', one for each horizontal pixel (so, say 1024), and in every bucket you compute the cumulated sound pressure, the variance, min, max and then, at display time, you decide how you will render that with colors.
For instance :
values between 0 positiveMin are very clear. (any sample is below that point).
values between positiveMin and positiveAverage - variance are darker,
values between positiveAverage - variance and positiveAverage + variance are darker,
and values between positiveAverage+variance and positiveMax lighter .
(same for negative values) That makes 5 colors for each bucket, and it's still quite some work, for you to code and for the browser to compute.
I don't know if the performance could get decent with this, but i fear the statistics accuracy and the color coding of the software you mention can't be reached on a browser (obviously not in real-time), and that you'll have to make some compromises.

Edit 2 :
I tried to get some colors out of stats but it quite failed. My guess, now, is that the guys at tracktor also change color depending on frequency.... quite some work here....

Anyway, just for the record, the code for an average / mean variation follows.
(variance was too low, i had to use mean variation).

enter image description here

// MUSIC DISPLAY
function displayBuffer2(buff /* is an AudioBuffer */) {
   var leftChannel = buff.getChannelData(0); // Float32Array describing left channel       
   // we 'resample' with cumul, count, variance
   // Offset 0 : PositiveCumul  1: PositiveCount  2: PositiveVariance
   //        3 : NegativeCumul  4: NegativeCount  5: NegativeVariance
   // that makes 6 data per bucket
   var resampled = new Float64Array(canvasWidth * 6 );
   var i=0, j=0, buckIndex = 0;
   var min=1e3, max=-1e3;
   var thisValue=0, res=0;
   var sampleCount = leftChannel.length;
   // first pass for mean
   for (i=0; i<sampleCount; i++) {
        // in which bucket do we fall ?
        buckIndex = 0 | ( canvasWidth * i / sampleCount );
        buckIndex *= 6;
        // positive or negative ?
        thisValue = leftChannel[i];
        if (thisValue>0) {
            resampled[buckIndex    ] += thisValue;
            resampled[buckIndex + 1] +=1;               
        } else if (thisValue<0) {
            resampled[buckIndex + 3] += thisValue;
            resampled[buckIndex + 4] +=1;                           
        }
        if (thisValue<min) min=thisValue;
        if (thisValue>max) max = thisValue;
   }
   // compute mean now
   for (i=0, j=0; i<canvasWidth; i++, j+=6) {
       if (resampled[j+1] != 0) {
             resampled[j] /= resampled[j+1]; ;
       }
       if (resampled[j+4]!= 0) {
             resampled[j+3] /= resampled[j+4];
       }
   }
   // second pass for mean variation  ( variance is too low)
   for (i=0; i<leftChannel.length; i++) {
        // in which bucket do we fall ?
        buckIndex = 0 | (canvasWidth * i / leftChannel.length );
        buckIndex *= 6;
        // positive or negative ?
        thisValue = leftChannel[i];
        if (thisValue>0) {
            resampled[buckIndex + 2] += Math.abs( resampled[buckIndex] - thisValue );               
        } else  if (thisValue<0) {
            resampled[buckIndex + 5] += Math.abs( resampled[buckIndex + 3] - thisValue );                           
        }
   }
   // compute mean variation/variance now
   for (i=0, j=0; i<canvasWidth; i++, j+=6) {
        if (resampled[j+1]) resampled[j+2] /= resampled[j+1];
        if (resampled[j+4]) resampled[j+5] /= resampled[j+4];   
   }
   context.save();
   context.fillStyle = '#000' ;
   context.fillRect(0,0,canvasWidth,canvasHeight );
   context.translate(0.5,canvasHeight / 2);   
  context.scale(1, 200);

   for (var i=0; i< canvasWidth; i++) {
        j=i*6;
       // draw from positiveAvg - variance to negativeAvg - variance 
       context.strokeStyle = '#F00';
       context.beginPath();
       context.moveTo( i  , (resampled[j] - resampled[j+2] ));
       context.lineTo( i  , (resampled[j +3] + resampled[j+5] ) );
       context.stroke();
       // draw from positiveAvg - variance to positiveAvg + variance 
       context.strokeStyle = '#FFF';
       context.beginPath();
       context.moveTo( i  , (resampled[j] - resampled[j+2] ));
       context.lineTo( i  , (resampled[j] + resampled[j+2] ) );
       context.stroke();
       // draw from negativeAvg + variance to negativeAvg - variance 
       // context.strokeStyle = '#FFF';
       context.beginPath();
       context.moveTo( i  , (resampled[j+3] + resampled[j+5] ));
       context.lineTo( i  , (resampled[j+3] - resampled[j+5] ) );
       context.stroke();
   }
   context.restore();
   console.log('done 231 iyi');
}

Question 2

Realtime moving Waveform

I'm currently playing with Web Audio API and made a spectrum using canvas.

function animate(){
 var a=new Uint8Array(analyser.frequencyBinCount),
     y=new Uint8Array(analyser.frequencyBinCount),b,c,d;
 analyser.getByteTimeDomainData(y);
 analyser.getByteFrequencyData(a);
 b=c=a.length;
 d=w/c;
 ctx.clearRect(0,0,w,h);
 while(b--){
  var bh=a[b]+1;
  ctx.fillStyle='hsla('+(b/c*240)+','+(y[b]/255*100|0)+'%,50%,1)';
  ctx.fillRect(1*b,h-bh,1,bh);
  ctx.fillRect(1*b,y[b],1,1);
 }
 animation=webkitRequestAnimationFrame(animate);
}

Mini question: is there a way to not write 2 times new Uint8Array(analyser.frequencyBinCount)?

DEMO

add a MP3/MP4 file and wait. (tested in Chrome)

http://jsfiddle.net/pc76H/2/

But there are many problems. I can't find a proper documentation of the various audio filters.

Also, if you look at the spectrum you will notice that after 70% or the range there is no data. What does that mean? that maybe from 16k hz to 20k hz is no sound? I would apply a text to the canvas to show the various HZ. but where??

I found out that the returned data is a power of 32 in length with a max of 2048 and the height is always 256.

BUT the real question is ... I want to create a moving waveform like in traktor.

I already did that some time ago with PHP it converts the file to low bitrate than extracts the data and coverts that to a image. i found the script somewhere...but I don't remember where... note: needs LAME

<?php
$a=$_GET["f"];
if(file_exists($a)){
    if(file_exists($a.".png")){
        header("Content-Type: image/png");
        echo file_get_contents($a.".png");
    }else{
        $b=3000;$c=300;define("d",3);
        ini_set("max_execution_time","30000");
        function n($g,$h){
            $g=hexdec(bin2hex($g));
            $h=hexdec(bin2hex($h));
            return($g+($h*256));
        };
        $k=substr(md5(time()),0,10);
        copy(realpath($a),"/var/www/".$k."_o.mp3");
        exec("lame /var/www/{$k}_o.mp3 -f -m m -b 16 --resample 8 /var/www/{$k}.mp3 && lame --decode /var/www/{$k}.mp3 /var/www/{$k}.wav");
        //system("lame {$k}_o.mp3 -f -m m -b 16 --resample 8 {$k}.mp3 && lame --decode {$k}.mp3 {$k}.wav");
        @unlink("/var/www/{$k}_o.mp3");
        @unlink("/var/www/{$k}.mp3");
        $l="/var/www/{$k}.wav";
        $m=fopen($l,"r");
        $n[]=fread($m,4);
        $n[]=bin2hex(fread($m,4));
        $n[]=fread($m,4);
        $n[]=fread($m,4);
        $n[]=bin2hex(fread($m,4));
        $n[]=bin2hex(fread($m,2));
        $n[]=bin2hex(fread($m,2));
        $n[]=bin2hex(fread($m,4));
        $n[]=bin2hex(fread($m,4));
        $n[]=bin2hex(fread($m,2));
        $n[]=bin2hex(fread($m,2));
        $n[]=fread($m,4);
        $n[]=bin2hex(fread($m,4));
        $o=hexdec(substr($n[10],0,2));
        $p=$o/8;
        $q=hexdec(substr($n[6],0,2));
        if($q==2){$r=40;}else{$r=80;};
        while(!feof($m)){
            $t=array();
            for($i=0;$i<$p;$i++){
                $t[$i]=fgetc($m);
            };
            switch($p){
                case 1:$s[]=n($t[0],$t[1]);break;
                case 2:if(ord($t[1])&128){$u=0;}else{$u=128;};$u=chr((ord($t[1])&127)+$u);$s[]= floor(n($t[0],$u)/256);break;
            };
            fread($m,$r);
        };
        fclose($m);
        unlink("/var/www/{$k}.wav");
        $x=imagecreatetruecolor(sizeof($s)/d,$c);
        imagealphablending($x,false);
        imagesavealpha($x,true);
        $y=imagecolorallocatealpha($x,255,255,255,127);
        imagefilledrectangle($x,0,0,sizeof($s)/d,$c,$y);
        for($d=0;$d<sizeof($s);$d+=d){
            $v=(int)($s[$d]/255*$c);
            imageline($x,$d/d,0+($c-$v),$d/d,$c-($c-$v),imagecolorallocate($x,255,0,255));
        };
        $z=imagecreatetruecolor($b,$c);
        imagealphablending($z,false);
        imagesavealpha($z,true);
        imagefilledrectangle($z,0,0,$b,$c,$y);
        imagecopyresampled($z,$x,0,0,0,0,$b,$c,sizeof($s)/d,$c);
        imagepng($z,realpath($a).".png");
        header("Content-Type: image/png");
        imagepng($z);
        imagedestroy($z);
    };
}else{
    echo $a;
};

?>

The script works... but you are limited to a max image size of 4k pixels.

so you have not a nice waveform if it should rappresent only some milliseconds.

What do i need to store/create a realtime waveform like the traktors app or this php script? btw the traktor has also a colored waveform(the php script not).

EDIT

I rewrote your script that it fits my idea... it's relatively fast.

As you can see inside the function createArray i push the various lines into an object with the key as x coordinate.

I'm simply taking the the highest number.

here is where we could play with the colors.

var ajaxB,AC,B,LC,op,x,y,ARRAY={},W=1024,H=256;
var aMax=Math.max.apply.bind(Math.max, Math);
function error(a){
 console.log(a);
};
function createDrawing(){
 console.log('drawingArray');
 var C=document.createElement('canvas');
 C.width=W;
 C.height=H;
 document.body.appendChild(C);
 var context=C.getContext('2d');
 context.save();
 context.strokeStyle='#121';
 context.globalCompositeOperation='lighter';
 L2=W*1;
 while(L2--){
  context.beginPath();
  context.moveTo(L2,0);
  context.lineTo(L2+1,ARRAY[L2]);
  context.stroke();
 }
 context.restore();
};
function createArray(a){
 console.log('creatingArray');
 B=a;
 LC=B.getChannelData(0);// Float32Array describing left channel
 L=LC.length;  
 op=W/L;
 for(var i=0;i<L;i++){
  x=W*i/L|0;
  y=LC[i]*H/2;
  if(ARRAY[x]){
   ARRAY[x].push(y)
  }else{
   !ARRAY[x-1]||(ARRAY[x-1]=aMax(ARRAY[x-1]));
   // the above line contains an array of values
   // which could be converted to a color 
   // or just simply create a gradient 
   // based on avg max min (frequency???) whatever
   ARRAY[x]=[y]
  }
 };
 createDrawing();
};
function decode(){
 console.log('decodingMusic');
 AC=new webkitAudioContext
 AC.decodeAudioData(this.response,createArray,error);
};
function loadMusic(url){
 console.log('loadingMusic');   
 ajaxB=new XMLHttpRequest;
 ajaxB.open('GET',url);
 ajaxB.responseType='arraybuffer';    
 ajaxB.onload=decode;
 ajaxB.send();
}
loadMusic('AudioOrVideo.mp4');

Create a waveform of the full track with Web Audio API

Answer

Related questions