org.webharvest.runtime
Class Scraper

java.lang.Object
  extended by org.webharvest.runtime.Scraper

public class Scraper
extends java.lang.Object

Basic runtime class.


Field Summary
static int STATUS_ERROR
           
static int STATUS_EXIT
           
static int STATUS_FINISHED
           
static int STATUS_PAUSED
           
static int STATUS_READY
           
static int STATUS_RUNNING
           
static int STATUS_STOPPED
           
 
Constructor Summary
Scraper(ScraperConfiguration configuration, java.lang.String workingDir)
          Constructor.
 
Method Summary
 void addFunctionParam(java.lang.String name, Variable value)
           
 void addRunningFunction(CallProcessor callProcessor)
           
 void addRuntimeListener(ScraperRuntimeListener listener)
           
 void addVariablesToContext(java.util.Map map)
          Add all map values to the context.
 void addVariableToContext(java.lang.String name, java.lang.Object value)
          Adds parameter with specified name and value to the context.
 void clearFunctionParams()
           
 void continueExecution()
           
 void dispose()
           
 void execute()
           
 Variable execute(java.util.List<IElementDef> ops)
           
 void exitExecution(java.lang.String message)
           
 void finishExecutingProcessor()
           
 ScraperConfiguration getConfiguration()
           
 java.sql.Connection getConnection(java.lang.String jdbc, java.lang.String connection, java.lang.String username, java.lang.String password)
          Get connection from the connection pool, and first create one if necessery
 ScraperContext getContext()
           
 java.util.Map getFunctionParams()
           
 HttpClientManager getHttpClientManager()
           
 org.apache.log4j.Logger getLogger()
           
 java.lang.String getMessage()
           
 BaseProcessor getParentRunningProcessor(BaseProcessor processor)
           
 CallProcessor getRunningFunction()
           
 HttpProcessor getRunningHttpProcessor()
           
 int getRunningLevel()
           
 BaseProcessor getRunningProcessor()
           
 BaseProcessor getRunningProcessorOfType(java.lang.Class processorClazz)
           
 RuntimeConfig getRuntimeConfig()
           
 org.webharvest.runtime.scripting.ScriptEngine getScriptEngine()
           
 org.webharvest.runtime.scripting.ScriptEngine getScriptEngine(java.lang.String engineType)
           
 int getStatus()
           
 java.lang.String getWorkingDir()
           
 void informListenersAboutError(java.lang.Exception e)
          Inform all scraper listeners that an error has occured during scraper execution.
 boolean isDebugMode()
           
 void pauseExecution()
           
 void processorFinishedExecution(BaseProcessor processor, java.util.Map properties)
           
 void releaseDBConnections()
          Releases all DB connections from the pool.
 void removeRunningFunction()
           
 void removeRunningHttpProcessor()
           
 void removeRuntimeListener(ScraperRuntimeListener listener)
           
 void setDebug(boolean debug)
           
 void setExecutingProcessor(BaseProcessor processor)
           
 void setRunningHttpProcessor(HttpProcessor httpProcessor)
           
 void stopExecution()
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

STATUS_READY

public static final int STATUS_READY
See Also:
Constant Field Values

STATUS_RUNNING

public static final int STATUS_RUNNING
See Also:
Constant Field Values

STATUS_PAUSED

public static final int STATUS_PAUSED
See Also:
Constant Field Values

STATUS_FINISHED

public static final int STATUS_FINISHED
See Also:
Constant Field Values

STATUS_STOPPED

public static final int STATUS_STOPPED
See Also:
Constant Field Values

STATUS_ERROR

public static final int STATUS_ERROR
See Also:
Constant Field Values

STATUS_EXIT

public static final int STATUS_EXIT
See Also:
Constant Field Values
Constructor Detail

Scraper

public Scraper(ScraperConfiguration configuration,
               java.lang.String workingDir)
Constructor.

Parameters:
configuration -
workingDir -
Method Detail

addVariableToContext

public void addVariableToContext(java.lang.String name,
                                 java.lang.Object value)
Adds parameter with specified name and value to the context. This way some predefined variables can be put in runtime context before execution starts.

Parameters:
name -
value -

addVariablesToContext

public void addVariablesToContext(java.util.Map map)
Add all map values to the context.

Parameters:
map -

execute

public Variable execute(java.util.List<IElementDef> ops)

execute

public void execute()

getContext

public ScraperContext getContext()

getConfiguration

public ScraperConfiguration getConfiguration()

getWorkingDir

public java.lang.String getWorkingDir()

getHttpClientManager

public HttpClientManager getHttpClientManager()

addRunningFunction

public void addRunningFunction(CallProcessor callProcessor)

getRunningFunction

public CallProcessor getRunningFunction()

clearFunctionParams

public void clearFunctionParams()

addFunctionParam

public void addFunctionParam(java.lang.String name,
                             Variable value)

getFunctionParams

public java.util.Map getFunctionParams()

removeRunningFunction

public void removeRunningFunction()

getRunningHttpProcessor

public HttpProcessor getRunningHttpProcessor()

setRunningHttpProcessor

public void setRunningHttpProcessor(HttpProcessor httpProcessor)

removeRunningHttpProcessor

public void removeRunningHttpProcessor()

getRunningLevel

public int getRunningLevel()

isDebugMode

public boolean isDebugMode()

setDebug

public void setDebug(boolean debug)

getScriptEngine

public org.webharvest.runtime.scripting.ScriptEngine getScriptEngine()

getScriptEngine

public org.webharvest.runtime.scripting.ScriptEngine getScriptEngine(java.lang.String engineType)

getLogger

public org.apache.log4j.Logger getLogger()

getRunningProcessor

public BaseProcessor getRunningProcessor()

getParentRunningProcessor

public BaseProcessor getParentRunningProcessor(BaseProcessor processor)
Parameters:
processor - Processor whose parent is needed.
Returns:
Parent running processor of the specified running processor, or null if processor is not currently running or if it is top running processor.

getRunningProcessorOfType

public BaseProcessor getRunningProcessorOfType(java.lang.Class processorClazz)
Parameters:
processorClazz - Class of enclosing running processor.
Returns:
Parent running processor in the tree of specified class, or null if it doesn't exist.

getRuntimeConfig

public RuntimeConfig getRuntimeConfig()

getConnection

public java.sql.Connection getConnection(java.lang.String jdbc,
                                         java.lang.String connection,
                                         java.lang.String username,
                                         java.lang.String password)
Get connection from the connection pool, and first create one if necessery

Parameters:
jdbc - Name of JDBC class
connection - JDBC connection string
username - Username
password - Password
Returns:
JDBC connection used to access database

setExecutingProcessor

public void setExecutingProcessor(BaseProcessor processor)

finishExecutingProcessor

public void finishExecutingProcessor()

processorFinishedExecution

public void processorFinishedExecution(BaseProcessor processor,
                                       java.util.Map properties)

addRuntimeListener

public void addRuntimeListener(ScraperRuntimeListener listener)

removeRuntimeListener

public void removeRuntimeListener(ScraperRuntimeListener listener)

getStatus

public int getStatus()

stopExecution

public void stopExecution()

exitExecution

public void exitExecution(java.lang.String message)

getMessage

public java.lang.String getMessage()

pauseExecution

public void pauseExecution()

continueExecution

public void continueExecution()

informListenersAboutError

public void informListenersAboutError(java.lang.Exception e)
Inform all scraper listeners that an error has occured during scraper execution.


releaseDBConnections

public void releaseDBConnections()
Releases all DB connections from the pool.


dispose

public void dispose()