tags:

views:

814

answers:

3

How can I replace diacritics (ă,ş,ţ etc) with their "normal" form (a,s,t) in javascript?

+3  A: 

If you want to do it entirely on the client side, I think your only option is with some kind of lookup table. Here's a starting point, written by a chap called Olavi Ivask on his blog...

function replaceDiacritics(s)
{
    var s;

    var diacritics =[
        /[\300-\306]/g, /[\340-\346]/g,  // A, a
        /[\310-\313]/g, /[\350-\353]/g,  // E, e
        /[\314-\317]/g, /[\354-\357]/g,  // I, i
        /[\322-\330]/g, /[\362-\370]/g,  // O, o
        /[\331-\334]/g, /[\371-\374]/g,  // U, u
        /[\321]/g, /[\361]/g, // N, n
        /[\307]/g, /[\347]/g, // C, c
    ];

    var chars = ['A','a','E','e','I','i','O','o','U','u','N','n','C','c'];

    for (var i = 0; i < diacritics.length; i++)
    {
        s = s.replace(diacritics[i],chars[i]);
    }

    document.write(s);
}

You can see this is simply an array of regexes for known diacritic chars, mapping them back onto a "plain" character.

Paul Dixon
+1  A: 

You would need a conversion map, something like this:

function removeAccents(str) {
 var convMap = {
  "ă" : "a",
  "ş" : "s",
  "ţ" : "t"
 }
 for (var i in convMap) {
  str = str.replace(new RegExp(i, "g"), convMap[i]);
 }
 return str;
}

Or if you have access to iconv on your box, you could perhaps use some ajax calls to remove the accents with iconv's //TRANSLIT parameter.

Wabbitseason
A: 

A simple modification to the script of Paul. Extend the String-object

String.prototype.removeDiacritics = function() {
    var diacritics = [
        [/[\300-\306]/g, 'A'],
        [/[\340-\346]/g, 'a'],
        [/[\310-\313]/g, 'E'],
        [/[\350-\353]/g, 'e'],
        [/[\314-\317]/g, 'I'],
        [/[\354-\357]/g, 'i'],
        [/[\322-\330]/g, 'O'],
        [/[\362-\370]/g, 'o'],
        [/[\331-\334]/g, 'U'],
        [/[\371-\374]/g, 'u'],
        [/[\321]/g, 'N'],
        [/[\361]/g, 'n'],
        [/[\307]/g, 'C'],
        [/[\347]/g, 'c'],
    ];
    var s = this;
    for (var i = 0; i < diacritics.length; i++) {
        s = s.replace(diacritics[i][0], diacritics[i][1]);
    }
    return s;
}

Now you can do:

var wrongString = "hëllô";
alert(wrongString.removeDiacritics()); // alerts "hello"
blavla