<file>

File I/O processor

Core v2.2.0

Overview

The processor provides complete file system operations for both text and binary files. It supports reading files, writing output, appending data, and listing directory contents with filtering.

Usage Examples

Example 1: Read text file

example-1.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<def var="config">
  <file path="config.txt" action="read"/>
</def>

<!-- File content is now in ${config} variable -->
</config>

Example 2: Write text file

example-2.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<file path="output.txt" action="write">
  This is the file content
  with multiple lines
</file>
</config>

Example 3: Write HTML from HTTP response

example-3.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<def var="htmlPage">
  <http url="https://example.com"/>
</def>

<file path="page.html" action="write" charset="UTF-8">
  ${htmlPage}
</file>
</config>

Example 4: Save JSON API response

example-4.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<file path="data/products.json" action="write">
  <http url="https://api.example.com/products">
    <http-header name="Accept">application/json</http-header>
  </http>
</file>
</config>

Example 5: Download binary file (image, PDF, ZIP)

example-5.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<file path="downloads/document.pdf" action="write" type="binary">
  <http url="https://example.com/report.pdf"/>
</file>

<!-- Works with images, ZIPs, executables, etc. -->
<file path="archive.zip" action="write" type="binary">
  <http url="https://example.com/data.zip"/>
</file>
</config>

Example 6: Append to log file

example-6.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<loop item="url" index="i">
  <def var="urls">http://example.com/page1&#10;http://example.com/page2</def>
  
  <def var="page">
    <http url="${url}"/>
  </def>
  
  <!-- Append each result to log -->
  <file path="scraping.log" action="append">
    Processed ${url} at ${_date}
  </file>
</loop>
</config>

Example 7: List directory files

example-7.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<!-- List all files in directory -->
<def var="fileList">
  <file path="downloads/" action="list"/>
</def>

<!-- Process each file -->
<loop item="filename">
  ${fileList}
  
  <file path="downloads/${filename}" action="read"/>
</loop>
</config>

Example 8: List with filter (wildcard pattern)

example-8.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<!-- List only .xml files -->
<def var="xmlFiles">
  <file path="configs/" 
        action="list" 
        listfilter="*.xml"
        listfiles="true"
        listdirs="false"/>
</def>
</config>

Example 9: Recursive directory listing

example-9.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<!-- List all Java files recursively -->
<def var="javaFiles">
  <file path="src/" 
        action="list"
        listfilter="*.java"
        listrecursive="true"/>
</def>
</config>

Important Notes

Related Processors