[fix] handle single closing element in HTMLTextExtractor

This commit is contained in:
Adam Tauber 2015-01-22 17:43:45 +01:00
parent 06186e72a9
commit 699fe60e50
1 changed files with 4 additions and 0 deletions

View File

@ -82,8 +82,12 @@ class HTMLTextExtractor(HTMLParser):
self.tags.append(tag) self.tags.append(tag)
def handle_endtag(self, tag): def handle_endtag(self, tag):
if not self.tags:
return
if tag != self.tags[-1]: if tag != self.tags[-1]:
raise Exception("invalid html") raise Exception("invalid html")
self.tags.pop() self.tags.pop()
def is_valid_tag(self): def is_valid_tag(self):