protected class WebcrawlerConnector.ProcessActivityHTMLHandler extends WebcrawlerConnector.ProcessActivityLinkHandler implements IHTMLHandler
activities, contextDescription, documentIdentifier, filter, linkType
Constructor and Description |
---|
WebcrawlerConnector.ProcessActivityHTMLHandler(String documentIdentifier,
IProcessActivity activities,
WebcrawlerConnector.DocumentURLFilter filter)
Constructor.
|
Modifier and Type | Method and Description |
---|---|
void |
finishUp()
Done with the document.
|
void |
noteAHREF(String rawURL)
Note discovered href
|
void |
noteFormEnd()
Note the end of a form
|
void |
noteFormInput(Map inputAttributes)
Note an input tag
|
void |
noteFormStart(Map formAttributes)
Note the start of a form
|
void |
noteFRAMESRC(String rawURL)
Note discovered FRAME SRC
|
void |
noteIMGSRC(String rawURL)
Note discovered IMG SRC
|
void |
noteLINKHREF(String rawURL)
Note discovered href
|
void |
noteMetaTag(Map metaAttributes)
Note a meta tag
|
void |
noteTextCharacter(char textCharacter)
Note a character of text.
|
boolean |
shouldIndex()
Decide whether we should index.
|
noteDiscoveredLink
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
noteDiscoveredLink
public WebcrawlerConnector.ProcessActivityHTMLHandler(String documentIdentifier, IProcessActivity activities, WebcrawlerConnector.DocumentURLFilter filter)
public boolean shouldIndex()
public void noteTextCharacter(char textCharacter) throws ManifoldCFException
noteTextCharacter
in interface IHTMLHandler
ManifoldCFException
public void noteMetaTag(Map metaAttributes) throws ManifoldCFException
noteMetaTag
in interface IMetaTagHandler
metaAttributes
- are the attributes that belong to the tag.ManifoldCFException
public void noteFormStart(Map formAttributes) throws ManifoldCFException
noteFormStart
in interface IHTMLHandler
ManifoldCFException
public void noteFormInput(Map inputAttributes) throws ManifoldCFException
noteFormInput
in interface IHTMLHandler
ManifoldCFException
public void noteFormEnd() throws ManifoldCFException
noteFormEnd
in interface IHTMLHandler
ManifoldCFException
public void noteAHREF(String rawURL) throws ManifoldCFException
noteAHREF
in interface IHTMLHandler
ManifoldCFException
public void noteLINKHREF(String rawURL) throws ManifoldCFException
noteLINKHREF
in interface IHTMLHandler
ManifoldCFException
public void noteIMGSRC(String rawURL) throws ManifoldCFException
noteIMGSRC
in interface IHTMLHandler
ManifoldCFException
public void noteFRAMESRC(String rawURL) throws ManifoldCFException
noteFRAMESRC
in interface IHTMLHandler
ManifoldCFException
public void finishUp() throws ManifoldCFException
IHTMLHandler
finishUp
in interface IHTMLHandler
ManifoldCFException