[fix] html escape

This commit is contained in:
asciimoo 2013-11-18 16:47:20 +01:00
parent ad7c83e3f6
commit d0427d9bae

View File

@ -1,5 +1,5 @@
from HTMLParser import HTMLParser
import htmlentitydefs
#import htmlentitydefs
import csv
import codecs
import cStringIO
@ -17,8 +17,9 @@ class HTMLTextExtractor(HTMLParser):
self.result.append(unichr(codepoint))
def handle_entityref(self, name):
codepoint = htmlentitydefs.name2codepoint[name]
self.result.append(unichr(codepoint))
#codepoint = htmlentitydefs.name2codepoint[name]
#self.result.append(unichr(codepoint))
self.result.append(name)
def get_text(self):
return u''.join(self.result)