Function definition processor
Core v2.2.0
The processor defines named, reusable functions that can be
called multiple times from different parts of the configuration using .
Functions support parameters, local variables, and return values, enabling
modular and maintainable scraper configurations.
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<function name="getCurrentDate">
${_date}
</function>
<def var="timestamp">
<call name="getCurrentDate"/>
</def>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<function name="fetchProduct">
<def var="productId"><call-param name="productId"/></def>
<http url="https://api.example.com/product/${productId}"/>
</function>
<!-- Call with different IDs -->
<def var="product1"><call name="fetchProduct"><call-param name="productId">123</call-param></call></def>
<def var="product2"><call name="fetchProduct"><call-param name="productId">456</call-param></call></def>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<function name="extractProductInfo">
<def var="html"><call-param name="html"/></def>
<def var="name">
<xpath expression="//h1[@class='title']/text()">
<html-to-xml>${html}</html-to-xml>
</xpath>
</def>
<def var="price">
<xpath expression="//span[@class='price']/text()">
<html-to-xml>${html}</html-to-xml>
</xpath>
</def>
<template>${name}|${price}</template>
</function>
<loop item="url">
<tokenize>${urls}</tokenize>
<def var="page"><http url="${url}"/></def>
<def var="info">
<call name="extractProductInfo">
<call-param name="html">${page}</call-param>
</call>
</def>
</loop>
</config>
inside function body