Regexp result template
Core v2.2.0
The
processor defines output format for regex matches using capture group references.
Must be used as a child element of
. Supports ${group0}, ${group1}, etc. for accessing matched groups.
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<regexp>
<regexp-pattern><![CDATA[<a href="([^"]+)">([^<]+)</a>]]></regexp-pattern>
<regexp-source>${html}</regexp-source>
<regexp-result>URL: ${group1}, Text: ${group2}</regexp-result>
</regexp>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<regexp>
<regexp-pattern><![CDATA[(\\d{3})-(\\d{3})-(\\d{4})]]></regexp-pattern>
<regexp-source>Call 555-123-4567 for support</regexp-source>
<regexp-result>(${group1}) ${group2}-${group3}</regexp-result>
</regexp>
<!-- Output: (555) 123-4567 -->
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<regexp>
<regexp-pattern><![CDATA[([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+)\\.([a-zA-Z]{2,})]]></regexp-pattern>
<regexp-source>Contact: john.doe@example.com</regexp-source>
<regexp-result>User: ${group1}, Domain: ${group2}.${group3}</regexp-result>
</regexp>
<!-- Output: User: john.doe, Domain: example.com -->
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<regexp>
<regexp-pattern><![CDATA[<div class="product" data-id="(\\d+)" data-name="([^"]+)" data-price="([\\d.]+)">]]></regexp-pattern>
<regexp-source>${productHtml}</regexp-source>
<regexp-result>
<product>
<id>${group1}</id>
<name>${group2}</name>
<price>${group3}</price>
</product>
</regexp-result>
</regexp>
</config>
element