Issue description:
- I need to fix an issue with resolving of standard HTML entitities.
- I've implemented HtmlEntityReader - implementation of XmlReader which has a code to resolve entities
- Public API of our system provides a methods with usage of XmlReader, so user can pass XmlReader created using one of the XmlReader.Create methods
Current code of my xml unit tests is below:
using System.Xml;
using NUnit.Framework;
namespace Tests
{
[TestFixture]
public class XmlTests
{
// this test works
[Test]
public void TestEntitiesResolving1()
{
var path = QA.ResolvePath(@"html\bugs\317.html");
using (var reader = new XmlTextReader(path, new NameTable()))
{
reader.XmlResolver = null; //to prevent DTD downloading
var wrapper = new HtmlEntityReader(reader, XmlUtils.HtmlEntities);
while (wrapper.Read()) { }
}
}
// this test does not work - why?
// what's the difference in initialization of internal XmlTextReaderImpl?
[Test]
public void TestEntitiesResolving2()
{
var path = QA.ResolvePath(@"html\bugs\317.html");
var settings = new XmlReaderSettings
{
XmlResolver = null, //to prevent DTD downloading
NameTable = new NameTable(),
ProhibitDtd = false,
CheckCharacters = false,
};
using (var reader = XmlReader.Create(path, settings))
{
var wrapper = new HtmlEntityReader(reader, XmlUtils.HtmlEntities);
while (wrapper.Read()) { }
}
}
}
}
Partial code of HtmlEntityReader is below:
internal sealed class HtmlEntityReader : XmlReader
{
readonly XmlReader _impl;
readonly Hashtable _entitySet;
string _entityValue;
public HtmlEntityReader(XmlReader reader, Hashtable entitySet)
{
if (reader == null) throw new ArgumentNullException("reader");
if (entitySet == null) throw new ArgumentNullException("entitySet");
_impl = reader;
_entitySet = entitySet;
}
public override XmlNodeType NodeType
{
get { return _entityValue != null ? XmlNodeType.Text : _impl.NodeType; }
}
public override string LocalName
{
get { return _entityValue != null ? string.Empty : _impl.LocalName; }
}
public override string Prefix
{
get { return _entityValue != null ? string.Empty : _impl.Prefix; }
}
public override string Name
{
get { return _entityValue != null ? string.Empty : _impl.Name; }
}
public override bool HasValue
{
get { return _entityValue != null || _impl.HasValue; }
}
public override string Value
{
get { return _entityValue ?? _impl.Value; }
}
public override bool CanResolveEntity
{
get { return true; }
}
public override void ResolveEntity()
{
//it seems this does not call - why?
}
public override bool Read()
{
_entityValue = null;
if (!_impl.Read()) return false;
if (NodeType == XmlNodeType.EntityReference)
{
//resolving of entity reference
_entityValue = (string)_entitySet[Name];
}
return true;
}
// ... delegation of XmlReader abstract methods to _impl
}
I've got the exception:
System.Xml.XmlException: Reference to undeclared entity 'nbsp'. Line 4, position 5. at System.Xml.XmlTextReaderImpl.Throw(Exception e) at System.Xml.XmlTextReaderImpl.Throw(String res, String arg, Int32 lineNo, Int32 linePos) at System.Xml.XmlTextReaderImpl.HandleGeneralEntityReference(String name, Boolean isInAttributeValue, Boolean pushFakeEntityIfNullResolver, Int32 entityStartLinePos) at System.Xml.XmlTextReaderImpl.HandleEntityReference(Boolean isInAttributeValue, EntityExpandType expandType, ref Int32 charRefEndPos) at System.Xml.XmlTextReaderImpl.ParseText(ref Int32 startPos, ref Int32 endPos, ref Int32 outOrChars) at System.Xml.XmlTextReaderImpl.ParseText() at System.Xml.XmlTextReaderImpl.ParseElementContent() at System.Xml.XmlTextReaderImpl.Read() ... private staff
Could you provide a quick advice or link to a solution while I am fixing / investigating / searching this issue through my own efforts?