Production-grade execution tracking and lifecycle management
UUID-based session tracking, thread-safe concurrent execution, real-time metrics, and event-driven monitoring for enterprise deployments.
What is the Session Management API?
State transitions and event flow
API classes and interfaces
Package: org.webharvest.runtime.session
public interface ScraperSession {
// Identification
String getSessionId();
String getClientId();
// State
SessionStatus getStatus();
SessionMetrics getMetrics();
Config getConfig();
DynamicScopeContext getContext();
Throwable getError();
// Control
boolean cancel();
// Blocking wait
void awaitCompletion()
throws InterruptedException;
boolean awaitCompletion(long timeout)
throws InterruptedException;
}
Package: org.webharvest.runtime.session
public class SessionMetrics {
// Timing
Instant getStartTime();
Instant getEndTime();
Duration getDuration();
// Progress
long getProcessedElements();
long incrementProcessedElements();
// Performance
double getProcessingRate();
// elements/second
// Lifecycle
void markEnded();
}
Package: org.webharvest.runtime.session
public enum SessionStatus {
PENDING, // Created but not started
RUNNING, // Currently executing
COMPLETED, // Finished successfully
FAILED, // Error occurred
CANCELLED; // User cancelled
// Helper methods
boolean isTerminal();
boolean isSuccessful();
boolean isActive();
}
Package: org.webharvest.runtime.session
public interface SessionRegistry {
// Registration
void register(ScraperSession session);
boolean unregister(String sessionId);
void clear();
// Lookup
Optional getSession(
String sessionId);
List getSessionsByClientId(
String clientId);
List getAllSessions();
// Utility
int size();
boolean isEmpty();
}
Package: org.webharvest.runtime.client
ClientContext context =
ClientContext.builder()
.clientId("client-123")
.metadata("project", "ecommerce")
.metadata("tier", "premium")
.metadata("userId", "user-456")
.build();
// Immutable after creation
String clientId = context.getClientId();
Object tier = context.getMetadata("tier");
Package: org.webharvest.runtime.service
public interface WebHarvestService {
// Async execution
Future executeAsync(
Config config,
ClientContext clientContext,
SessionOptions options);
// Session lookup
Optional getSession(
String sessionId);
List getSessionsByClient(
String clientId);
}
Common patterns and best practices
// Load configuration
Config config = Config.fromFile("scraper.xml");
// Create client context
ClientContext client = ClientContext.of("client-123");
// Execute async
WebHarvestService service = new WebHarvestServiceImpl();
Future future = service.executeAsync(
config, client, SessionOptions.DEFAULT);
// Get session immediately
ScraperSession session = future.get(100, TimeUnit.MILLISECONDS);
System.out.println("Session ID: " + session.getSessionId());
System.out.println("Status: " + session.getStatus()); // PENDING or RUNNING
// Wait for completion (blocking)
session.awaitCompletion();
System.out.println("Final status: " + session.getStatus()); // COMPLETED/FAILED
// Rich client context
ClientContext client = ClientContext.builder()
.clientId("datacorp-prod")
.metadata("project", "product-catalog")
.metadata("tier", "enterprise")
.metadata("userId", "alice@datacorp.com")
.metadata("environment", "production")
.build();
// Session options
SessionOptions options = SessionOptions.builder()
.enableTokenTracking(true)
.maxDuration(Duration.ofMinutes(30))
.build();
// Execute
Future future = service.executeAsync(config, client, options);
ScraperSession session = future.get();
// Monitor progress
while (!session.getStatus().isTerminal()) {
SessionMetrics metrics = session.getMetrics();
System.out.printf("Processed: %d elements (%.2f/sec)%n",
metrics.getProcessedElements(),
metrics.getProcessingRate());
Thread.sleep(1000);
}
// Get final results
if (session.getStatus() == SessionStatus.COMPLETED) {
DynamicScopeContext context = session.getContext();
Variable result = context.getVar("result");
System.out.println("Result: " + result);
}
// Inject registry
@Inject
private SessionRegistry registry;
// Execute multiple sessions for different clients
List> futures = new ArrayList<>();
for (String clientId : clients) {
ClientContext context = ClientContext.of(clientId);
Future future = service.executeAsync(config, context);
futures.add(future);
}
// Wait for all to complete
for (Future future : futures) {
future.get(); // Blocks until done
}
// Query sessions by client
List clientSessions =
registry.getSessionsByClientId("client-123");
System.out.printf("Client 'client-123' ran %d sessions%n",
clientSessions.size());
// Calculate total usage per client
long totalRequests = clientSessions.stream()
.map(ScraperSession::getTokenTracker)
.filter(Objects::nonNull)
.mapToLong(tracker -> tracker.getUsage().getHttpRequests())
.sum();
System.out.printf("Total HTTP requests: %d%n", totalRequests);
// Create event listener
public class SessionMonitor {
@Subscribe
public void onSessionStarted(SessionStartedEvent event) {
System.out.printf("Session %s started for client %s%n",
event.getSessionId(), event.getClientId());
// Send notification, update UI, start timer, etc.
}
@Subscribe
public void onSessionCompleted(SessionCompletedEvent event) {
SessionMetrics metrics = event.getMetrics();
System.out.printf("Session %s completed in %s%n",
event.getSessionId(), metrics.getDuration());
System.out.printf("Processed %d elements at %.2f/sec%n",
metrics.getProcessedElements(),
metrics.getProcessingRate());
// Log to database, send email, trigger webhook, etc.
}
@Subscribe
public void onSessionFailed(SessionFailedEvent event) {
System.err.printf("Session %s failed: %s%n",
event.getSessionId(), event.getError().getMessage());
// Alert ops team, retry logic, rollback, etc.
}
}
// Register listener
EventBus eventBus = InjectorHelper.getInjector()
.getInstance(EventBus.class);
eventBus.register(new SessionMonitor());
// Start long-running session
Future future = service.executeAsync(config, client);
ScraperSession session = future.get();
// User clicks "Cancel" button
session.cancel();
// Check status
Thread.sleep(1000); // Give it time to stop
System.out.println(session.getStatus()); // CANCELLED
// Or with timeout
if (!session.awaitCompletion(5000)) {
// Still running after 5 seconds
session.cancel();
System.out.println("Session cancelled due to timeout");
}
// IDE ExecutionManager
public class ExecutionManager {
private final WebHarvestService service;
private final SessionRegistry registry;
private final WebSocketHandler websocket;
public String executeConfiguration(String xmlConfig, String tabId) {
// Parse config
Config config = Config.fromString(xmlConfig);
// Create client context (IDE tab)
ClientContext client = ClientContext.builder()
.clientId("ide-user")
.metadata("tabId", tabId)
.metadata("source", "web-ide")
.build();
// Execute async
Future future = service.executeAsync(config, client);
ScraperSession session = future.get();
// Stream updates via WebSocket
new Thread(() -> {
while (!session.getStatus().isTerminal()) {
SessionMetrics metrics = session.getMetrics();
websocket.send(tabId, Json.toJson(metrics));
Thread.sleep(500);
}
}).start();
return session.getSessionId();
}
}
Full JavaDoc for all session management classes
For complete API documentation including all methods, parameters, and return types, refer to the JavaDoc included in the distribution package:
org.webharvest.runtime.session - Session Management APIorg.webharvest.runtime.tracking - Token Tracking APIorg.webharvest.runtime.client - Client Context APIorg.webharvest.runtime.service - WebHarvest Service APIJavaDoc is available in the docs/api/ directory of the
distribution ZIP.