tags:

views:

3828

answers:

4

I have a string containing binary data in JS. Now I want to read, for example, an integer from it. So I get the first 4 characters, use charCodeAt, do some shifting etc. to get an integer.

Problem is that strings in JS are UTF-16 (instead of ASCII) and charCodeAt often returns values higher than 256.

The Mozilla reference states that "The first 128 Unicode code points are a direct match of the ASCII character encoding." (what about ASCII values > 128?)

How can I convert the result of charCodeAt to an ASCII value? Or is there a better way to convert a string of four characters to a 4 byte integer?

A: 

I'm going to assume for a second that your objective is to read arbitrary bytes from a string. My first suggestion would be to make your string representation a hexidecmal representation of the binary data.

You can read the values using conversions to numbers from hex:

var BITS_PER_BYTE = 8;

function readBytes(hexString, numBytes) {
 return Number( parseInt( hexString.substr(0, numBytes * (BITS_PER_BYTE/4) ),16 ) );
}

function removeBytes(hexString, numBytes) {
 return hexString.substr( numBytes * (BITS_PER_BYTE/BITS_PER_CHAR) );
}

The functions can then be used to read whatever you want:

var hex = '4ef2c3382fd';
alert( 'We had: ' + hex );

var intVal = readBytes(hex,2);
alert( 'Two bytes: ' + intVal.toString(2) );

hex = removeBytes(hex,2);
alert( 'Now we have: ' + hex );

You can then interpret the byte string however you want.

Hope this helps! Cheers!

coderjoe
+5  A: 

I believe that you can can do this with relatively simple bit operations:

function stringToBytes ( str ) {
  var ch, st, re = [];
  for (var i = 0; i < str.length; i++ ) {
    ch = str.charCodeAt(i);  // get char 
    st = [];                 // set up "stack"
    do {
      st.push( ch & 0xFF );  // push byte to stack
      ch = ch >> 8;          // shift value down by 1 byte
    }  
    while ( ch );
    // add stack contents to result
    // done because chars have "wrong" endianness
    re = re.concat( st.reverse() );
  }
  // return an array of bytes
  return re;
}

stringToBytes( "A\u1242B\u4123C" );  // [65, 18, 66, 66, 65, 35, 67]

It should be a simple matter to sum the output up by reading the byte array as if it were memory and adding it up into larger numbers:

function getIntAt ( arr, offs ) {
  return (arr[offs+0] << 24) +
         (arr[offs+1] << 16) +
         (arr[offs+2] << 8) +
          arr[offs+3];
}

function getWordAt ( arr, offs ) {
  return (arr[offs+0] << 8) +
          arr[offs+1];
}

'\\u' + getWordAt( stringToBytes( "A\u1242" ), 1 ).toString(16);  // "1242"
Borgar
+2  A: 

How did you get the binary data into the string in the first place? How the binary data gets encoded into a string is an IMPORTANT consideration, and you need an answer to that question before you can proceed.

One way I know of to get binary data into a string, is to use the XHR object, and set it to expect UTF-16.

Once it's in utf-16, you can retrieve 16-bit numbers from the string using "....".charCodeAt(0)

which will be a number between 0 and 65535

Then, if you like, you can convert that number into two numbers between 0 and 255 like this:

var leftByte = mynumber>>>8;
var rightByte = mynumber&255;
Breton
+4  A: 

Borgar's answer seems correct.

Just wanted to clarify one point. Javascript treats bitwise operations as '32-bit signed int's, where the last (left-most) bit is the sign bit. Ie,

getIntAt([0x7f,0,0,0],0).toString(16)  //  "7f000000"

getIntAt([0x80,0,0,0],0).toString(16)  // "-80000000"

However, for octet-data processing (eg, network stream, etc), usually want the 'unsigned int' representation. This can be accomplished by adding a '>>> 0' (zero-fill right-shift) operator which internally tells Javascript to treat this as unsigned.

function getUIntAt ( arr, offs ) {
  return (arr[offs+0] << 24) +
         (arr[offs+1] << 16) +
         (arr[offs+2] << 8) +
          arr[offs+3] >>> 0;
}

getUIntAt([0x80,0,0,0],0).toString(16)   // "80000000"
Moos