Regexp source text definition
Core v2.2.0
The processor defines the text content to match against using regular expressions.
Must be used as a child element of . Supports variable substitution and complex text sources.
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<regexp>
<regexp-pattern><![CDATA[\\d{3}-\\d{3}-\\d{4}]]></regexp-pattern>
<regexp-source>Call us at 555-123-4567 or 800-555-0199</regexp-source>
</regexp>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<def var="htmlContent">
<http url="https://example.com"/>
</def>
<regexp>
<regexp-pattern><![CDATA[<a href="([^"]+)">]]></regexp-pattern>
<regexp-source>${htmlContent}</regexp-source>
</regexp>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<def var="rawText">
<file path="data.txt" action="read"/>
</def>
<regexp>
<regexp-pattern><![CDATA[Email: ([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})]]></regexp-pattern>
<regexp-source>${rawText}</regexp-source>
</regexp>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<regexp>
<regexp-pattern><![CDATA[<div class="price">\\$([\\d,]+\\.[\\d]{2})</div>]]></regexp-pattern>
<regexp-source>
<html-to-xml>
<http url="https://shop.example.com/product/123"/>
</html-to-xml>
</regexp-source>
</regexp>
</config>
element