CSV splitting, caching, and age-based processing
Solutions for common file processing tasks: splitting CSV files, checking file age, and implementing simple caching.
Read and process CSV line by line
Use <file> + <tokenize> to split CSV into lines and fields.
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<!-- Read CSV file -->
<def var="csvContent">
<file path="data.csv" action="read"/>
</def>
<!-- Split by newline into list of lines -->
<def var="lines">
<tokenize delimiters="\n">${csvContent}</tokenize>
</def>
<!-- Process each line -->
<loop item="line" index="i">
${lines}
<!-- Skip header line -->
<if condition="${i > 0}">
<!-- Split line into fields -->
<def var="fields">
<tokenize delimiters=",">${line}</tokenize>
</def>
<!-- Access individual fields -->
<log message="Field 1: ${fields[0]}, Field 2: ${fields[1]}"/>
</if>
</loop>
</config>
Conditional processing based on file modification time
Only process file if it's newer than a certain age (useful for caching).
<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://org.webharvest/schema/2.1/core">
<!-- Check if cache file is fresh (younger than 2 hours) -->
<def var="cacheFile">cache/data.txt</def>
<def var="maxAgeMs">7200000</def> <!-- 2 hours -->
<def var="isCacheFresh">
<script>
var file = new java.io.File(context.getVar('cacheFile').toString());
if (!file.exists()) {
false; // Not fresh
} else {
var ageMs = new Date().getTime() - file.lastModified();
var maxAge = parseInt(context.getVar('maxAgeMs'));
ageMs < maxAge; // Fresh if age less than max
}
</script>
</def>
<if condition="${isCacheFresh}">
<!-- Use cache -->
<file path="${cacheFile}" action="read"/>
<else>
<!-- Fetch fresh data and cache it -->
<def var="freshData">
<http url="https://api.example.com/data"/>
</def>
<file path="${cacheFile}" action="write">${freshData}</file>
</else>
</if>
</config>