<function>

Function definition processor

Core v2.2.0

Overview

The processor defines named, reusable functions that can be called multiple times from different parts of the configuration using . Functions support parameters, local variables, and return values, enabling modular and maintainable scraper configurations.

Usage Examples

Example 1: Simple function

example-1.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<function name="getCurrentDate">
  ${_date}
</function>

<def var="timestamp">
  <call name="getCurrentDate"/>
</def>
</config>

Example 2: Function with parameters

example-2.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<function name="fetchProduct">
  <def var="productId"><call-param name="productId"/></def>
  
  <http url="https://api.example.com/product/${productId}"/>
</function>

<!-- Call with different IDs -->
<def var="product1"><call name="fetchProduct"><call-param name="productId">123</call-param></call></def>
<def var="product2"><call name="fetchProduct"><call-param name="productId">456</call-param></call></def>
</config>

Example 3: Reusable extraction logic

example-3.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<function name="extractProductInfo">
  <def var="html"><call-param name="html"/></def>
  
  <def var="name">
    <xpath expression="//h1[@class='title']/text()">
      <html-to-xml>${html}</html-to-xml>
    </xpath>
  </def>
  
  <def var="price">
    <xpath expression="//span[@class='price']/text()">
      <html-to-xml>${html}</html-to-xml>
    </xpath>
  </def>
  
  <template>${name}|${price}</template>
</function>

<loop item="url">
  <tokenize>${urls}</tokenize>
  
  <def var="page"><http url="${url}"/></def>
  <def var="info">
    <call name="extractProductInfo">
      <call-param name="html">${page}</call-param>
    </call>
  </def>
</loop>
</config>

Important Notes

Related Processors