Loop iterator processor
Core v2.2.0
The
processor is fundamental for iterating over collections of data,
such as product lists, search results, paginated APIs, or any repeating data structure.
It executes its body content once for each item, with optional index tracking and filtering.
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<loop item="product" index="i" maxloops="50">
<xpath expression="//div[@class='product']">
<html-to-xml>
<http url="https://example.com/products"/>
</html-to-xml>
</xpath>
<!-- Extract product details -->
<def var="name">
<xpath expression=".//h3/text()">${product}</xpath>
</def>
<def var="price">
<xpath expression=".//span[@class='price']/text()">${product}</xpath>
</def>
<!-- Save product -->
<file path="products/product-${i}.json" action="write">
{"name": "${name}", "price": "${price}"}
</file>
</loop>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<def var="urls">
http://example.com/page1
http://example.com/page2
http://example.com/page3
</def>
<loop item="url">
${urls}
<http url="${url}"/>
</loop>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<def var="pageNumbers">1,2,3,4,5,6,7,8,9,10</def>
<loop item="pageNum" index="i" delimiter=",">
${pageNumbers}
<def var="pageContent">
<http url="https://example.com/products?page=${pageNum}"/>
</def>
<file path="pages/page-${pageNum}.html" action="write">
${pageContent}
</file>
</loop>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<def var="totalCount" value="0"/>
<loop item="item" index="idx" maxloops="100">
<xpath expression="//li">
<html-to-xml>
<http url="https://example.com/list"/>
</html-to-xml>
</xpath>
<!-- Track iteration number -->
<file path="scraping.log" action="append">
Processing item ${idx}: ${item}
</file>
<!-- Update counter -->
<def var="totalCount" value="${totalCount}1"/>
</loop>
<!-- Final count -->
<file path="summary.txt" action="write">
Processed ${totalCount} items
</file>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<loop item="link" empty="true">
<xpath expression="//a/@href">
<html-to-xml>
<http url="https://example.com/links"/>
</html-to-xml>
</xpath>
<!-- Only non-empty links are processed -->
<http url="${link}"/>
</loop>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<loop item="product" index="i">
<xpath expression="//div[@class='product']">
<html-to-xml>
<http url="https://example.com/products"/>
</html-to-xml>
</xpath>
<def var="productName">
<xpath expression=".//h3/text()">${product}</xpath>
</def>
<!-- Nested loop for product variants -->
<loop item="variant" index="j">
<xpath expression=".//div[@class='variant']">${product}</xpath>
<def var="size">
<xpath expression=".//span[@class='size']/text()">${variant}</xpath>
</def>
<file path="products/product-${i}-variant-${j}.txt" action="write">
${productName} - Size: ${size}
</file>
</loop>
</loop>
</config>
item
and index
variables are local to loop body