views:

88

answers:

3

You wrote this code:

accentsTidy = function(s){ 
                    var r=s.toLowerCase(); 
                    r = r.replace(new RegExp("\\s", 'g'),""); 
                    r = r.replace(new RegExp("[àáâãäå]", 'g'),"a"); 
                    r = r.replace(new RegExp("æ", 'g'),"ae"); 
                    r = r.replace(new RegExp("ç", 'g'),"c"); 
                    r = r.replace(new RegExp("[èéêë]", 'g'),"e"); 
                    r = r.replace(new RegExp("[ìíîï]", 'g'),"i"); 
                    r = r.replace(new RegExp("ñ", 'g'),"n");                             
                    r = r.replace(new RegExp("[òóôõö]", 'g'),"o"); 
                    r = r.replace(new RegExp("œ", 'g'),"oe"); 
                    r = r.replace(new RegExp("[ùúûü]", 'g'),"u"); 
                    r = r.replace(new RegExp("[ýÿ]", 'g'),"y"); 
                    r = r.replace(new RegExp("\\W", 'g'),""); 
                    return r; 
            }; 

I would like if answer my question, please. If I would like big char the (for example: Á É -> A E etc.) then I must change the code. Thank you very much!

+1  A: 

This should work:

function purifyString(value) {
    var r = [];
    for (var i = 0, length = value.length; i < length; i++) {
        r.push(replaceCharacter(value.charAt(i)));
    }
    return r.join("");
}

function replaceCharacter(character) {
    switch (character) {
        case '\r': return "\r";
        case '\n': return "\n";
        case '\t': return "\t";
        case '\f': return "\r\n";
        case '\v': return "\r\n";
        case '`': return "'";
        case '€': return "_";
        case '‚': return ",";
        case 'ƒ': return "f";
        case '„': return "\"";
        case '…': return "...";
        case '†': return "_";
        case '‡': return "_";
        case 'ˆ': return "^";
        case '‰': return "%";
        case 'Š': return "S";
        case '‹': return "<";
        case 'Œ': return "CE";
        case 'Ž': return "Z";
        case '‘': return "'";
        case '’': return "'";
        case '“': return "\"";
        case '”': return "\"";
        case '•': return "-";
        case '–': return "-";
        case '—': return "-";
        case '˜': return "~";
        case '™': return "(tm)";
        case 'š': return "s";
        case '›': return ">";
        case 'œ': return "ce";
        case 'ž': return "z";
        case 'Ÿ': return "Y";
        case '¡': return "i";
        case '¥': return "Y";
        case '¦': return "|";
        case 'ª': return "a";
        case '¬': return "-";
        case '¯': return "-";
        case '²': return "2";
        case '³': return "3";
        case '´': return "'";
        case '¸': return ",";
        case '¹': return "1";
        case 'º': return "0";
        case '¼': return "1/4";
        case '½': return "1/2";
        case '¾': return "3/4";
        case '¿': return "?";
        case 'À': return "A";
        case 'Á': return "A";
        case 'Â': return "A";
        case 'Ã': return "A";
        case 'Ä': return "A";
        case 'Å': return "A";
        case 'Æ': return "AE";
        case 'Ç': return "C";
        case 'È': return "E";
        case 'É': return "E";
        case 'Ê': return "E";
        case 'Ë': return "E";
        case 'Ì': return "I";
        case 'Í': return "I";
        case 'Î': return "I";
        case 'Ï': return "I";
        case 'Ð': return "D";
        case 'Ñ': return "N";
        case 'Ò': return "O";
        case 'Ó': return "O";
        case 'Ô': return "O";
        case 'Õ': return "O";
        case 'Ö': return "O";
        case '×': return "x";
        case 'Ø': return "O";
        case 'Ù': return "U";
        case 'Ú': return "U";
        case 'Û': return "U";
        case 'Ü': return "U";
        case 'Ý': return "Y";
        case 'ß': return "B";
        case 'à': return "a";
        case 'á': return "a";
        case 'â': return "a";
        case 'ã': return "a";
        case 'ä': return "a";
        case 'å': return "a";
        case 'æ': return "ae";
        case 'ç': return "c";
        case 'è': return "e";
        case 'é': return "e";
        case 'ê': return "e";
        case 'ë': return "e";
        case 'ì': return "i";
        case 'í': return "i";
        case 'î': return "i";
        case 'ï': return "i";
        case 'ñ': return "n";
        case 'ò': return "o";
        case 'ó': return "o";
        case 'ô': return "o";
        case 'õ': return "o";
        case 'ö': return "o";
        case '÷': return "/";
        case 'ø': return "o";
        case 'ù': return "u";
        case 'ú': return "u";
        case 'û': return "u";
        case 'ü': return "u";
        case 'ý': return "y";
        case 'ÿ': return "y";
        case '©': return "(c)";
        case '®': return "(r)";
        default: return "";
    }
}
ChaosPandion
woah! did you have that ready somewhere? when did you need to use this before?
Here Be Wolves
@jrharshath - This has been laying around for a while. We use it to clean out the cruft from people copying over Microsoft Word documents.
ChaosPandion
...but you should replace Œ with Oe, not CE. And perhaps replace € with EUR instead of _, Ä/ä with Ae/ae, Ö/ö with Oe/oe, Ü/ü with Ue/ue and ß with ss. There may be other characters that can be "normalized" better, too.
Tim Pietzcker
lame answer dude...
Kasturi
@Kasturi - I bet you have a lot of friends.
ChaosPandion
A: 

Hello,

I don't know how to type those chars on my keyboard, but the following should do the job.


accentsTidyUpperCase = function(s){ 
                    var r=s.toLowerCase(); 
                    r = r.replace(new RegExp("[àáâãäå]".toUpperCase(), 'g'),"A"); 
                    r = r.replace(new RegExp("æ".toUpperCase(), 'g'),"AE"); 
                    r = r.replace(new RegExp("ç".toUpperCase(), 'g'),"C"); 
                    r = r.replace(new RegExp("[èéêë]".toUpperCase(), 'g'),"E"); 
                    r = r.replace(new RegExp("[ìíîï]".toUpperCase(), 'g'),"I"); 
                    r = r.replace(new RegExp("ñ".toUpperCase(), 'g'),"N");                             
                    r = r.replace(new RegExp("[òóôõö]".toUpperCase(), 'g'),"O"); 
                    r = r.replace(new RegExp("œ".toUpperCase(), 'g'),"OE"); 
                    r = r.replace(new RegExp("[ùúûü]".toUpperCase(), 'g'),"U"); 
                    r = r.replace(new RegExp("[ýÿ]".toUpperCase(), 'g'),"Y"); 
                    return r; 
            }; 

When you call both functions (accentsTidy and accentsTidyUpperCase) you will get lower case chars replaced by lowercase chars and upper case ones by upper case...

hacksteak25
+2  A: 

You could do this:

accentsTidy = function(s) {
    var map = [
        ["\\s", ""],
        ["[àáâãäå]", "a"],
        ["æ", "ae"],
        ["ç", "c"],
        ["[èéêë]", "e"],
        ["[ìíîï]", "i"],
        ["ñ", "n"],
        ["[òóôõö]", "o"],
        ["œ", "oe"],
        ["[ùúûü]", "u"],
        ["[ýÿ]", "y"],
        ["\\W", ""]
    ];
    for (var i=0; i<map.length; ++i) {
        s = s.replace(new RegExp(map[i][0], "gi"), function(match) {
            if (match.toUpperCase() === match) {
                return map[i][1].toUpperCase();
            } else {
                return map[i][1];
            }
        });
    }
    return s;
}

The significant changes:

  • regular expression patterns are case insensitive (see i modifier)
  • return upper-case variant of character if matched character is upper-case (match.toUpperCase() === match)
Gumbo
hmm, I think thats my favorite... but I am not the questioner :)
hacksteak25