|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectde.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler
public class BoilerpipeHTMLContentHandler
A simple SAX ContentHandler, used by BoilerpipeSAXInput. Can
be used by different parser implementations, e.g. NekoHTML and TagSoup.
| Constructor Summary | |
|---|---|
BoilerpipeHTMLContentHandler()
Constructs a BoilerpipeHTMLContentHandler using the
DefaultTagActionMap. |
|
BoilerpipeHTMLContentHandler(TagActionMap tagActions)
Constructs a BoilerpipeHTMLContentHandler using the given
TagActionMap. |
|
| Method Summary | |
|---|---|
protected void |
addTextBlock(TextBlock tb)
|
void |
addWhitespaceIfNecessary()
|
void |
characters(char[] ch,
int start,
int length)
|
void |
endDocument()
|
void |
endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName)
|
void |
endPrefixMapping(java.lang.String prefix)
|
java.lang.String |
getTitle()
|
void |
ignorableWhitespace(char[] ch,
int start,
int length)
|
void |
processingInstruction(java.lang.String target,
java.lang.String data)
|
void |
recycle()
Recycles this instance. |
void |
setDocumentLocator(org.xml.sax.Locator locator)
|
void |
setTitle(java.lang.String s)
|
void |
skippedEntity(java.lang.String name)
|
void |
startDocument()
|
void |
startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts)
|
void |
startPrefixMapping(java.lang.String prefix,
java.lang.String uri)
|
TextDocument |
toTextDocument()
Returns a TextDocument containing the extracted TextBlock
s. |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public BoilerpipeHTMLContentHandler()
BoilerpipeHTMLContentHandler using the
DefaultTagActionMap.
public BoilerpipeHTMLContentHandler(TagActionMap tagActions)
BoilerpipeHTMLContentHandler using the given
TagActionMap.
tagActions - The TagActionMap to use, e.g.
DefaultTagActionMap.| Method Detail |
|---|
public void recycle()
public void endDocument()
throws org.xml.sax.SAXException
endDocument in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXException
public void endPrefixMapping(java.lang.String prefix)
throws org.xml.sax.SAXException
endPrefixMapping in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXException
public void ignorableWhitespace(char[] ch,
int start,
int length)
throws org.xml.sax.SAXException
ignorableWhitespace in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXException
public void processingInstruction(java.lang.String target,
java.lang.String data)
throws org.xml.sax.SAXException
processingInstruction in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void setDocumentLocator(org.xml.sax.Locator locator)
setDocumentLocator in interface org.xml.sax.ContentHandler
public void skippedEntity(java.lang.String name)
throws org.xml.sax.SAXException
skippedEntity in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXException
public void startDocument()
throws org.xml.sax.SAXException
startDocument in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXException
public void startPrefixMapping(java.lang.String prefix,
java.lang.String uri)
throws org.xml.sax.SAXException
startPrefixMapping in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXException
public void startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts)
throws org.xml.sax.SAXException
startElement in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXException
public void endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName)
throws org.xml.sax.SAXException
endElement in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXException
public void characters(char[] ch,
int start,
int length)
throws org.xml.sax.SAXException
characters in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionprotected void addTextBlock(TextBlock tb)
public java.lang.String getTitle()
public void setTitle(java.lang.String s)
public TextDocument toTextDocument()
TextDocument containing the extracted TextBlock
s. NOTE: Only call this after parsing.
TextDocumentpublic void addWhitespaceIfNecessary()
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||