Track usage, enforce quotas, enable billing
Granular resource tracking for HTTP requests, bandwidth, CPU time, and memory with built-in support for quota enforcement and billing integration.
What tokens measure and why
Unit: Count
<http>
callUnit: Bytes
Unit: Milliseconds
Unit: Bytes
Core classes and usage patterns
Package: org.webharvest.runtime.tracking
public interface TokenTracker {
// Increment tokens
void increment(ResourceType type);
void increment(ResourceType type,
long amount);
// Query usage
long getTokenCount(ResourceType type);
TokenUsage getUsage(); // Snapshot
// Reset (for testing)
void reset();
void reset(ResourceType type);
}
Package: org.webharvest.runtime.tracking
public final class TokenUsage {
// Accessors
long getHttpRequests();
long getHttpBytes();
long getCpuTimeMillis();
long getMemoryPeakBytes();
// Convenience
double getHttpBytesGB();
double getCpuTimeHours();
double getMemoryPeakGB();
// Export
String toJson();
Map toMap();
}
Package: org.webharvest.runtime.tracking
public enum ResourceType {
HTTP_REQUEST("http.request", "count"),
HTTP_BYTES("http.bytes", "bytes"),
CPU_TIME("cpu.time", "milliseconds"),
MEMORY_PEAK("memory.peak", "bytes");
String getMetricName();
String getUnit();
}
Tracking, quotas, and billing scenarios
// Execute with token tracking
ScraperSession session = service.executeAsync(config, client).get();
session.awaitCompletion();
// Get token usage
TokenTracker tracker = session.getTokenTracker();
TokenUsage usage = tracker.getUsage();
System.out.println("HTTP Requests: " + usage.getHttpRequests());
System.out.println("Bandwidth: " + usage.getHttpBytesGB() + " GB");
System.out.println("CPU Time: " + usage.getCpuTimeHours() + " hours");
System.out.println("Memory Peak: " + usage.getMemoryPeakGB() + " GB");
// Export to JSON
String json = usage.toJson();
// {"httpRequests":1250,"httpBytes":47185920,"cpuTime":12500,"memoryPeak":268435456}
// Define quota policy
public class QuotaPolicy {
private final long maxHttpRequests;
private final long maxHttpBytes;
public boolean isWithinQuota(ClientContext client, Config config) {
// Get client's historical usage
List sessions =
registry.getSessionsByClientId(client.getClientId());
long totalRequests = sessions.stream()
.map(ScraperSession::getTokenTracker)
.filter(Objects::nonNull)
.mapToLong(t -> t.getUsage().getHttpRequests())
.sum();
// Check quota
if (totalRequests >= maxHttpRequests) {
throw new QuotaExceededException(
"HTTP request quota exceeded: " + totalRequests +
"/" + maxHttpRequests);
}
return true;
}
}
// Use before execution
if (quotaPolicy.isWithinQuota(client, config)) {
service.executeAsync(config, client);
} else {
System.err.println("Quota exceeded - upgrade plan or wait");
}
// Listen for completed sessions
@Subscribe
public void onSessionCompleted(SessionCompletedEvent event) {
ScraperSession session = registry.getSession(event.getSessionId()).get();
TokenUsage usage = session.getTokenTracker().getUsage();
// Calculate cost
double cost = 0.0;
cost += usage.getHttpRequests() * 0.001; // $0.001 per request
cost += usage.getHttpBytesGB() * 0.10; // $0.10 per GB
cost += usage.getCpuTimeHours() * 0.02; // $0.02 per CPU-hour
cost += usage.getMemoryPeakGB() * 0.01; // $0.01 per GB-hour
// Create Stripe invoice line item
InvoiceItemCreateParams params = InvoiceItemCreateParams.builder()
.setCustomer(session.getClientId())
.setAmount((long)(cost * 100)) // Convert to cents
.setCurrency("usd")
.setDescription(String.format(
"WebHarvest Session %s: %d requests, %.2f GB",
session.getSessionId(),
usage.getHttpRequests(),
usage.getHttpBytesGB()))
.putMetadata("sessionId", session.getSessionId())
.putMetadata("httpRequests", String.valueOf(usage.getHttpRequests()))
.putMetadata("httpBytes", String.valueOf(usage.getHttpBytes()))
.build();
InvoiceItem.create(params);
System.out.printf("Billed client %s: $%.4f%n",
session.getClientId(), cost);
}
// Generate monthly usage report
public class UsageReporter {
public Map getMonthlyUsageByClient(
YearMonth month) {
Map> sessionsByClient =
registry.getAllSessions().stream()
.filter(s -> isInMonth(s, month))
.collect(Collectors.groupingBy(
ScraperSession::getClientId));
Map aggregated = new HashMap<>();
for (Map.Entry> entry :
sessionsByClient.entrySet()) {
String clientId = entry.getKey();
List sessions = entry.getValue();
// Sum all tokens for this client
long httpRequests = 0;
long httpBytes = 0;
long cpuTime = 0;
long memoryPeak = 0;
for (ScraperSession session : sessions) {
TokenUsage usage = session.getTokenTracker().getUsage();
httpRequests += usage.getHttpRequests();
httpBytes += usage.getHttpBytes();
cpuTime += usage.getCpuTimeMillis();
memoryPeak = Math.max(memoryPeak, usage.getMemoryPeakBytes());
}
TokenUsage total = new TokenUsage(
httpRequests, httpBytes, cpuTime, memoryPeak);
aggregated.put(clientId, total);
}
return aggregated;
}
}
// Generate report
Map usage = reporter.getMonthlyUsageByClient(
YearMonth.of(2025, 10));
usage.forEach((client, tokens) -> {
System.out.printf("Client: %s%n", client);
System.out.printf(" HTTP Requests: %,d%n", tokens.getHttpRequests());
System.out.printf(" Bandwidth: %.2f GB%n", tokens.getHttpBytesGB());
System.out.printf(" CPU Time: %.2f hours%n", tokens.getCpuTimeHours());
});
// Monitor token usage during execution
public class TokenMonitor {
private final long quotaLimit = 10_000; // 10k requests
public void monitorSession(ScraperSession session) {
new Thread(() -> {
while (!session.getStatus().isTerminal()) {
TokenTracker tracker = session.getTokenTracker();
long requests = tracker.getTokenCount(
ResourceType.HTTP_REQUEST);
// Check approaching limit
if (requests > quotaLimit * 0.8) {
System.err.printf(
"WARNING: 80%% quota used (%d/%d)%n",
requests, quotaLimit);
}
// Hard limit
if (requests >= quotaLimit) {
System.err.println(
"QUOTA EXCEEDED - Cancelling session");
session.cancel();
break;
}
Thread.sleep(1000);
}
}).start();
}
}
// Export to Prometheus metrics
@Subscribe
public void onSessionCompleted(SessionCompletedEvent event) {
ScraperSession session = registry.getSession(event.getSessionId()).get();
TokenUsage usage = session.getTokenTracker().getUsage();
String clientId = session.getClientId();
// Prometheus counters
httpRequestsTotal.labels(clientId).inc(usage.getHttpRequests());
httpBytesTotal.labels(clientId).inc(usage.getHttpBytes());
cpuMillisecondsTotal.labels(clientId).inc(usage.getCpuTimeMillis());
// Prometheus gauges
memoryPeakBytes.labels(clientId).set(usage.getMemoryPeakBytes());
sessionDurationSeconds.labels(clientId).set(
session.getMetrics().getDuration().getSeconds());
}
// Export to DataDog
Map tags = new HashMap<>();
tags.put("client_id", session.getClientId());
tags.put("session_id", session.getSessionId());
tags.put("status", session.getStatus().name());
statsd.count("webharvest.http.requests", usage.getHttpRequests(), tags);
statsd.gauge("webharvest.http.bytes", usage.getHttpBytes(), tags);
statsd.histogram("webharvest.cpu.time", usage.getCpuTimeMillis(), tags);
Real-world pricing models and implementations
Note: This section demonstrates the technical capabilities of WebHarvest's token tracking system for billing and quota enforcement. The code is production-ready and can be integrated into cloud platforms. However, this is not a commercial service offering - WebHarvest is an open-source project. These examples show how you can build your own pricing models if deploying WebHarvest as a hosted service.
Bill through AWS Marketplace with automatic metering:
Monthly subscriptions with usage-based billing:
Track costs per project/department:
Connect token tracking to external systems