LeapOCRLeapOCR Docs

Python SDK

Official Python SDK for LeapOCR - Async-first client with full type hints

Python SDK

Official Python SDK for LeapOCR - Transform documents into structured data using AI-powered OCR.

PyPI version Python Support License: MIT

Installation

pip install leapocr

Or using uv:

uv add leapocr

Prerequisites

Quick Start

import asyncio
import os
from leapocr import LeapOCR, ProcessOptions, Format

async def main():
    # Initialize the SDK
    async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
        # Process a document and wait for completion
        result = await client.ocr.process_and_wait(
            "https://example.com/document.pdf",
            options=ProcessOptions(
                format=Format.STRUCTURED,
                instructions="Extract invoice number, date, and total amount"
            ),
        )

        print(f"Credits used: {result.credits_used}")
        print(f"Extracted data: {result.data}")

        # Delete the job
        await client.ocr.delete_job(result.job_id)

asyncio.run(main())

New to LeapOCR? Check out the Getting Started guide for a complete walkthrough.

Key Features

  • Async-First Design - Built on asyncio with httpx for high-performance concurrent processing
  • Type-Safe API - Full type hints and mypy strict mode support
  • Context Manager Support - Proper resource cleanup with async context managers
  • Built-in Retry Logic - Automatic exponential backoff for transient failures
  • Progress Tracking - Real-time callbacks for long-running operations

For models, formats, and schemas, see Core Concepts.

Usage Examples

Processing from URL

import asyncio
from leapocr import LeapOCR, ProcessOptions, Format, Model

async def process_url():
    async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
        result = await client.ocr.process_and_wait(
            "https://example.com/invoice.pdf",
            options=ProcessOptions(
                format=Format.STRUCTURED,
                model=Model.STANDARD_V1,
                instructions="Extract invoice number, date, and total amount",
            ),
        )

        print(f"Processing time: {result.processing_time_seconds:.2f}s")
        print(f"Credits used: {result.credits_used}")
        print(f"Data: {result.data}")

asyncio.run(process_url())

Processing Local Files

from pathlib import Path

async def process_file():
    async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
        file_path = Path("invoice.pdf")

        result = await client.ocr.process_file_and_wait(
            file_path,
            options=ProcessOptions(
                format=Format.STRUCTURED,
                schema={
                    "invoice_number": "string",
                    "total_amount": "number",
                    "invoice_date": "string",
                    "vendor_name": "string",
                },
            ),
        )

        print(f"Data: {result.data}")

asyncio.run(process_file())

Custom Schema Extraction

See the Custom Schemas guide for detailed schema design patterns.

schema = {
    "type": "object",
    "properties": {
        "patient_name": {"type": "string"},
        "date_of_birth": {"type": "string"},
        "medications": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "dosage": {"type": "string"},
                },
            },
        },
    },
}

async def extract_medical_data():
    async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
        result = await client.ocr.process_and_wait(
            "https://example.com/medical-record.pdf",
            options=ProcessOptions(
                format=Format.STRUCTURED,
                schema=schema,
            ),
        )

        print(f"Patient: {result.data['patient_name']}")
        print(f"Medications: {result.data['medications']}")

asyncio.run(extract_medical_data())

Monitoring Job Progress

async def monitor_progress():
    async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
        # Submit job
        job = await client.ocr.process_url(
            "https://example.com/document.pdf",
            options=ProcessOptions(format=Format.STRUCTURED),
        )

        # Poll for status
        while True:
            status = await client.ocr.get_job_status(job.job_id)
            print(f"Status: {status.status} ({status.progress * 100:.1f}% complete)")

            if status.status == "completed":
                result = await client.ocr.get_job_result(job.job_id)
                print("Processing complete!", result)
                break

            await asyncio.sleep(2)

asyncio.run(monitor_progress())

Using Progress Callbacks

async def with_progress_callback():
    async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
        def progress_callback(status):
            print(f"Progress: {status.progress * 100:.1f}%")

        result = await client.ocr.process_and_wait(
            "https://example.com/document.pdf",
            options=ProcessOptions(format=Format.STRUCTURED),
            poll_interval=1.0,  # Check every second
            progress_callback=progress_callback,
        )

        print("Complete!", result)

asyncio.run(with_progress_callback())

Configuration

Custom Configuration

from leapocr import LeapOCR, LeapOCRConfig
from httpx import Timeout

config = LeapOCRConfig(
    api_key="your-api-key",
    base_url="https://api.leapocr.com",
    timeout=Timeout(60.0),
    max_retries=3,
)

async with LeapOCR.from_config(config) as client:
    # Use client
    pass

Environment Variables

export LEAPOCR_API_KEY="your-api-key"

Error Handling

The SDK provides typed exceptions for robust error handling:

from leapocr import (
    LeapOCRError,
    AuthenticationError,
    ValidationError,
    ProcessingError,
    NetworkError,
)

async def handle_errors():
    try:
        async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
            result = await client.ocr.process_and_wait(
                "https://example.com/doc.pdf"
            )
    except AuthenticationError:
        print("Authentication failed - check your API key")
    except ValidationError as e:
        print(f"Validation error: {e.message}")
    except NetworkError as e:
        print(f"Network error: {e.message}")
        if e.is_retryable:
            # Retry the operation
            pass
    except ProcessingError as e:
        print(f"Processing failed: {e.message}")
    except LeapOCRError as e:
        print(f"General error: {e.message}")

asyncio.run(handle_errors())

Exception Types

  • AuthenticationError - Authentication failures
  • ValidationError - Input validation errors
  • NetworkError - Network/connectivity issues (retryable)
  • ProcessingError - Document processing errors
  • TimeoutError - Operation timeouts
  • LeapOCRError - Base exception for all SDK errors

See the Troubleshooting Guide for common issues and solutions.

API Reference

Client Initialization

# From API key
async with LeapOCR(api_key: str) as client:
    pass

# From config
async with LeapOCR.from_config(config: LeapOCRConfig) as client:
    pass

Processing Methods

# Process from URL
await client.ocr.process_url(
    url: str,
    options: Optional[ProcessOptions] = None
) -> Job

# Process and wait for completion
await client.ocr.process_and_wait(
    url: str,
    options: Optional[ProcessOptions] = None,
    poll_interval: float = 2.0,
    progress_callback: Optional[Callable] = None
) -> OCRResult

# Process file
await client.ocr.process_file(
    file: Path | BinaryIO,
    options: Optional[ProcessOptions] = None
) -> Job

# Process file and wait
await client.ocr.process_file_and_wait(
    file: Path | BinaryIO,
    options: Optional[ProcessOptions] = None,
    poll_interval: float = 2.0,
    progress_callback: Optional[Callable] = None
) -> OCRResult

# Get job status
await client.ocr.get_job_status(job_id: str) -> JobStatus

# Get job result
await client.ocr.get_job_result(job_id: str) -> OCRResult

# Wait for completion
await client.ocr.wait_until_done(
    job_id: str,
    poll_interval: float = 2.0,
    progress_callback: Optional[Callable] = None
) -> OCRResult

Processing Options

from leapocr import ProcessOptions, Format, Model

options = ProcessOptions(
    format=Format.STRUCTURED,  # or Format.MARKDOWN, Format.PER_PAGE_STRUCTURED
    model=Model.STANDARD_V1,   # or custom model string
    schema={"field": "type"},  # JSON schema for extraction
    instructions="...",         # Processing instructions
    template_slug="...",        # Document template slug
)

Type Hints

The SDK provides full type hints for all public APIs:

from leapocr import (
    LeapOCR,
    LeapOCRConfig,
    ProcessOptions,
    Format,
    Model,
    Job,
    JobStatus,
    OCRResult,
    Page,
)

Next Steps

Resources