File I/O processor
Core v2.2.0
The
processor provides complete file system operations for both
text and binary files. It supports reading files, writing output, appending data,
and listing directory contents with filtering.
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<def var="config">
<file path="config.txt" action="read"/>
</def>
<!-- File content is now in ${config} variable -->
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<file path="output.txt" action="write">
This is the file content
with multiple lines
</file>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<def var="htmlPage">
<http url="https://example.com"/>
</def>
<file path="page.html" action="write" charset="UTF-8">
${htmlPage}
</file>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<file path="data/products.json" action="write">
<http url="https://api.example.com/products">
<http-header name="Accept">application/json</http-header>
</http>
</file>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<file path="downloads/document.pdf" action="write" type="binary">
<http url="https://example.com/report.pdf"/>
</file>
<!-- Works with images, ZIPs, executables, etc. -->
<file path="archive.zip" action="write" type="binary">
<http url="https://example.com/data.zip"/>
</file>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<loop item="url" index="i">
<def var="urls">http://example.com/page1 http://example.com/page2</def>
<def var="page">
<http url="${url}"/>
</def>
<!-- Append each result to log -->
<file path="scraping.log" action="append">
Processed ${url} at ${_date}
</file>
</loop>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<!-- List all files in directory -->
<def var="fileList">
<file path="downloads/" action="list"/>
</def>
<!-- Process each file -->
<loop item="filename">
${fileList}
<file path="downloads/${filename}" action="read"/>
</loop>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<!-- List only .xml files -->
<def var="xmlFiles">
<file path="configs/"
action="list"
listfilter="*.xml"
listfiles="true"
listdirs="false"/>
</def>
</config>
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<!-- List all Java files recursively -->
<def var="javaFiles">
<file path="src/"
action="list"
listfilter="*.java"
listrecursive="true"/>
</def>
</config>
type="binary"
for images, ZIPs, PDFs to avoid corruption