File Advanced
Patterns

CSV splitting, caching, and age-based processing

Solutions for common file processing tasks: splitting CSV files, checking file age, and implementing simple caching.

Pattern 1: Split CSV File

Read and process CSV line by line

Use <file> + <tokenize> to split CSV into lines and fields.

csv-split.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
    <!-- Read CSV file -->
    <def var="csvContent">
        <file path="data.csv" action="read"/>
    </def>

    <!-- Split by newline into list of lines -->
    <def var="lines">
        <tokenize delimiters="\n">${csvContent}</tokenize>
    </def>

    <!-- Process each line -->
    <loop item="line" index="i">
        ${lines}
        
        <!-- Skip header line -->
        <if condition="${i > 0}">
            <!-- Split line into fields -->
            <def var="fields">
                <tokenize delimiters=",">${line}</tokenize>
            </def>
            
            <!-- Access individual fields -->
            <log message="Field 1: ${fields[0]}, Field 2: ${fields[1]}"/>
        </if>
    </loop>
</config>

Pattern 2: Check File Age

Conditional processing based on file modification time

Only process file if it's newer than a certain age (useful for caching).

file-age-check.xml
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<!-- Check if cache file is fresh (younger than 2 hours) -->
<def var="cacheFile">cache/data.txt</def>
<def var="maxAgeMs">7200000</def> <!-- 2 hours -->

<def var="isCacheFresh">
  <script>
    var file = new java.io.File(context.getVar('cacheFile').toString());
    
    if (!file.exists()) {
            false; // Not fresh
    } else {
      var ageMs = new Date().getTime() - file.lastModified();
      var maxAge = parseInt(context.getVar('maxAgeMs'));
            ageMs < maxAge; // Fresh if age less than max
    }
  </script>
</def>

<if condition="${isCacheFresh}">
    <!-- Use cache -->
    <file path="${cacheFile}" action="read"/>
<else>
    <!-- Fetch fresh data and cache it -->
    <def var="freshData">
    <http url="https://api.example.com/data"/>
    </def>
    <file path="${cacheFile}" action="write">${freshData}</file>
</else>
</if>
</config>

Related Resources