Python SDK
Official Python SDK for LeapOCR - Async-first client with full type hints
Python SDK
Official Python SDK for LeapOCR - Transform documents into structured data using AI-powered OCR.
Installation
pip install leapocrOr using uv:
uv add leapocrPrerequisites
- Python 3.9 or higher
- LeapOCR API key (sign up here)
Quick Start
import asyncio
import os
from leapocr import LeapOCR, ProcessOptions, Format
async def main():
# Initialize the SDK
async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
# Process a document and wait for completion
result = await client.ocr.process_and_wait(
"https://example.com/document.pdf",
options=ProcessOptions(
format=Format.STRUCTURED,
instructions="Extract invoice number, date, and total amount"
),
)
print(f"Credits used: {result.credits_used}")
print(f"Extracted data: {result.data}")
# Delete the job
await client.ocr.delete_job(result.job_id)
asyncio.run(main())New to LeapOCR? Check out the Getting Started guide for a complete walkthrough.
Key Features
- Async-First Design - Built on asyncio with httpx for high-performance concurrent processing
- Type-Safe API - Full type hints and mypy strict mode support
- Context Manager Support - Proper resource cleanup with async context managers
- Built-in Retry Logic - Automatic exponential backoff for transient failures
- Progress Tracking - Real-time callbacks for long-running operations
For models, formats, and schemas, see Core Concepts.
Usage Examples
Processing from URL
import asyncio
from leapocr import LeapOCR, ProcessOptions, Format, Model
async def process_url():
async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
result = await client.ocr.process_and_wait(
"https://example.com/invoice.pdf",
options=ProcessOptions(
format=Format.STRUCTURED,
model=Model.STANDARD_V1,
instructions="Extract invoice number, date, and total amount",
),
)
print(f"Processing time: {result.processing_time_seconds:.2f}s")
print(f"Credits used: {result.credits_used}")
print(f"Data: {result.data}")
asyncio.run(process_url())Processing Local Files
from pathlib import Path
async def process_file():
async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
file_path = Path("invoice.pdf")
result = await client.ocr.process_file_and_wait(
file_path,
options=ProcessOptions(
format=Format.STRUCTURED,
schema={
"invoice_number": "string",
"total_amount": "number",
"invoice_date": "string",
"vendor_name": "string",
},
),
)
print(f"Data: {result.data}")
asyncio.run(process_file())Custom Schema Extraction
See the Custom Schemas guide for detailed schema design patterns.
schema = {
"type": "object",
"properties": {
"patient_name": {"type": "string"},
"date_of_birth": {"type": "string"},
"medications": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"dosage": {"type": "string"},
},
},
},
},
}
async def extract_medical_data():
async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
result = await client.ocr.process_and_wait(
"https://example.com/medical-record.pdf",
options=ProcessOptions(
format=Format.STRUCTURED,
schema=schema,
),
)
print(f"Patient: {result.data['patient_name']}")
print(f"Medications: {result.data['medications']}")
asyncio.run(extract_medical_data())Monitoring Job Progress
async def monitor_progress():
async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
# Submit job
job = await client.ocr.process_url(
"https://example.com/document.pdf",
options=ProcessOptions(format=Format.STRUCTURED),
)
# Poll for status
while True:
status = await client.ocr.get_job_status(job.job_id)
print(f"Status: {status.status} ({status.progress * 100:.1f}% complete)")
if status.status == "completed":
result = await client.ocr.get_job_result(job.job_id)
print("Processing complete!", result)
break
await asyncio.sleep(2)
asyncio.run(monitor_progress())Using Progress Callbacks
async def with_progress_callback():
async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
def progress_callback(status):
print(f"Progress: {status.progress * 100:.1f}%")
result = await client.ocr.process_and_wait(
"https://example.com/document.pdf",
options=ProcessOptions(format=Format.STRUCTURED),
poll_interval=1.0, # Check every second
progress_callback=progress_callback,
)
print("Complete!", result)
asyncio.run(with_progress_callback())Configuration
Custom Configuration
from leapocr import LeapOCR, LeapOCRConfig
from httpx import Timeout
config = LeapOCRConfig(
api_key="your-api-key",
base_url="https://api.leapocr.com",
timeout=Timeout(60.0),
max_retries=3,
)
async with LeapOCR.from_config(config) as client:
# Use client
passEnvironment Variables
export LEAPOCR_API_KEY="your-api-key"Error Handling
The SDK provides typed exceptions for robust error handling:
from leapocr import (
LeapOCRError,
AuthenticationError,
ValidationError,
ProcessingError,
NetworkError,
)
async def handle_errors():
try:
async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
result = await client.ocr.process_and_wait(
"https://example.com/doc.pdf"
)
except AuthenticationError:
print("Authentication failed - check your API key")
except ValidationError as e:
print(f"Validation error: {e.message}")
except NetworkError as e:
print(f"Network error: {e.message}")
if e.is_retryable:
# Retry the operation
pass
except ProcessingError as e:
print(f"Processing failed: {e.message}")
except LeapOCRError as e:
print(f"General error: {e.message}")
asyncio.run(handle_errors())Exception Types
AuthenticationError- Authentication failuresValidationError- Input validation errorsNetworkError- Network/connectivity issues (retryable)ProcessingError- Document processing errorsTimeoutError- Operation timeoutsLeapOCRError- Base exception for all SDK errors
See the Troubleshooting Guide for common issues and solutions.
API Reference
Client Initialization
# From API key
async with LeapOCR(api_key: str) as client:
pass
# From config
async with LeapOCR.from_config(config: LeapOCRConfig) as client:
passProcessing Methods
# Process from URL
await client.ocr.process_url(
url: str,
options: Optional[ProcessOptions] = None
) -> Job
# Process and wait for completion
await client.ocr.process_and_wait(
url: str,
options: Optional[ProcessOptions] = None,
poll_interval: float = 2.0,
progress_callback: Optional[Callable] = None
) -> OCRResult
# Process file
await client.ocr.process_file(
file: Path | BinaryIO,
options: Optional[ProcessOptions] = None
) -> Job
# Process file and wait
await client.ocr.process_file_and_wait(
file: Path | BinaryIO,
options: Optional[ProcessOptions] = None,
poll_interval: float = 2.0,
progress_callback: Optional[Callable] = None
) -> OCRResult
# Get job status
await client.ocr.get_job_status(job_id: str) -> JobStatus
# Get job result
await client.ocr.get_job_result(job_id: str) -> OCRResult
# Wait for completion
await client.ocr.wait_until_done(
job_id: str,
poll_interval: float = 2.0,
progress_callback: Optional[Callable] = None
) -> OCRResultProcessing Options
from leapocr import ProcessOptions, Format, Model
options = ProcessOptions(
format=Format.STRUCTURED, # or Format.MARKDOWN, Format.PER_PAGE_STRUCTURED
model=Model.STANDARD_V1, # or custom model string
schema={"field": "type"}, # JSON schema for extraction
instructions="...", # Processing instructions
template_slug="...", # Document template slug
)Type Hints
The SDK provides full type hints for all public APIs:
from leapocr import (
LeapOCR,
LeapOCRConfig,
ProcessOptions,
Format,
Model,
Job,
JobStatus,
OCRResult,
Page,
)Next Steps
- Explore Common Use Cases for real-world examples
- Learn about Processing Models
- Review Custom Schemas for structured extraction
- Check the FAQ for common questions
Resources
- PyPI: pypi.org/project/leapocr
- GitHub: github.com/leapocr/leapocr-python
- Issues: GitHub Issues