I am processing xhtml using javascript. I am getting the text content for a div node by concatenating the nodeValue of all child nodes where nodeType == Node.TEXT_NODE.
The resulting string sometimes contains a non-breaking space entity. How do I replace this with a regular space character?
My div looks like this...
<div><b>Expires On</b> Sep 30, 2009 06:30 AM</div>
The following suggestions found on the web did not work:
var cleanText = text.replace(/^\xa0*([^\xa0]*)\xa0*$/g,"");
var cleanText = replaceHtmlEntities(text);
var replaceHtmlEntites = (function() {
var translate_re = /&(nbsp|amp|quot|lt|gt);/g;
var translate = {
"nbsp": " ",
"amp" : "&",
"quot": "\"",
"lt" : "<",
"gt" : ">"
};
return function(s) {
return ( s.replace(translate_re, function(match, entity) {
return translate[entity];
}) );
}
})();
Any suggestions?
This is much easier than you're making it. The text node will not have the literal string " "
in it, it'll have have the corresponding character with code 160.
function replaceNbsps(str) {
var re = new RegExp(String.fromCharCode(160), "g");
return str.replace(re, " ");
}
textNode.nodeValue = replaceNbsps(textNode.nodeValue);
UPDATE
Even easier:
textNode.nodeValue = textNode.nodeValue.replace(/\u00a0/g, " ");