Home | SourceForge | Forums | Contact

Extarnal configuration file functions.xml

This configuration file contains user-defined function download-multipage-list whose purpose is to collect items from series of pages connected by typical "Next" button.

<?xml version="1.0" encoding="UTF-8"?>
 
<config>
    <!-- 
        Download multi-page list of items.
        
        @param pageUrl       - URL of starting page
        @param itemXPath     - XPath expression to obtain single item in the list
        @param nextXPath     - XPath expression to URL for the next page
        @param maxloops      - maximum number of pages downloaded
        
        @return list of all downloaded items
     -->
    <function name="download-multipage-list">
        <return>
            <while condition="${pageUrl.toString().length() != 0}" maxloops="${maxloops}" index="i">
                <empty>
                    <var-def name="content">
                        <html-to-xml>
                            <http url="${pageUrl}"/>
                        </html-to-xml>
                    </var-def>
 
                    <var-def name="nextLinkUrl">
                        <xpath expression="${nextXPath}">
                            <var name="content"/>
                        </xpath>
                    </var-def>
 
                    <var-def name="pageUrl">
                        <template>${sys.fullUrl(pageUrl.toString(), nextLinkUrl.toString())}</template>
                    </var-def>
                </empty>
    
                <xpath expression="${itemXPath}">
                    <var name="content"/>
                </xpath>
            </while>
        </return>
    </function>
</config>