LeapOCR Documentation

Transform documents into structured data using AI-powered OCR. Complete API reference, SDK guides, and examples for JavaScript, Python, PHP, and Go.

LeapOCR provides enterprise-grade document processing with AI-powered data extraction. Transform PDFs and images into structured data, markdown, or custom formats using our powerful API and SDKs.

Get Started in 5 Minutes

Getting Started

Complete walkthrough to process your first document

Choose Your SDK

Pick from JavaScript, Python, PHP, Go, or REST API

SDK API Reference

Interactive public endpoint reference generated from the spec

Use Cases

Real-world examples and patterns

Key Features

AI-Powered Extraction - Advanced OCR with multiple model options
Multiple Output Formats - Page-level structured JSON or markdown output
Custom Schemas - Define your own extraction schemas with JSON Schema
Custom Instructions - Define your own extraction instructions to guide the model
Custom Templates - Define your own document templates with predefined schemas, instructions settings

Quick Start

Choose your language to get started:

import { LeapOCR } from "leapocr";
import { z } from "zod";

const InvoiceSchema = z.object({
  invoice_number: z.string(),
  invoice_date: z.string(),
  total: z.number(),
});

const client = new LeapOCR({ apiKey: process.env.LEAPOCR_API_KEY });

const job = await client.ocr.processURL("https://example.com/invoice.pdf", {
  format: "structured",
  schema: z.toJSONSchema(InvoiceSchema),
  instructions: "Extract invoice number, date, and total",
});

const result = await client.ocr.waitUntilDone(job.jobId);

View full JavaScript guide →

from leapocr import LeapOCR, ProcessOptions, Format

async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
    result = await client.ocr.process_and_wait(
        "https://example.com/invoice.pdf",
        options=ProcessOptions(
            format=Format.STRUCTURED,
            schema={
                "type": "object",
                "properties": {
                    "invoice_number": {"type": "string"},
                    "invoice_date": {"type": "string"},
                    "total": {"type": "number"},
                },
                "required": ["invoice_number", "invoice_date", "total"],
            },
            instructions="Extract invoice number, date, and total"
        )
    )

View full Python guide →

<?php

use LeapOCR\Enums\Format;
use LeapOCR\LeapOCR;
use LeapOCR\Models\ProcessOptions;

$client = new LeapOCR((string) getenv('LEAPOCR_API_KEY'));

$job = $client->ocr()->processUrl(
    'https://example.com/invoice.pdf',
    new ProcessOptions(
        format: Format::STRUCTURED,
        schema: [
            'type' => 'object',
            'properties' => [
                'invoice_number' => ['type' => 'string'],
                'invoice_date' => ['type' => 'string'],
                'total' => ['type' => 'number'],
            ],
            'required' => ['invoice_number', 'invoice_date', 'total'],
        ],
        instructions: 'Extract invoice number, date, and total',
    ),
);

$result = $client->ocr()->waitUntilDone($job->jobId);

View full PHP guide →

client, _ := ocr.New(os.Getenv("LEAPOCR_API_KEY"))

schema := map[string]interface{}{
    "type": "object",
    "properties": map[string]interface{}{
        "invoice_number": map[string]string{"type": "string"},
        "invoice_date":   map[string]string{"type": "string"},
        "total":          map[string]string{"type": "number"},
    },
    "required": []string{"invoice_number", "invoice_date", "total"},
}

job, _ := client.ProcessURL(ctx, "https://example.com/invoice.pdf",
    ocr.WithFormat(ocr.FormatStructured),
    ocr.WithSchema(schema),
    ocr.WithInstructions("Extract invoice number, date, and total"),
)

result, _ := client.WaitUntilDone(ctx, job.ID)

View full Go guide →

# Submit document from URL
curl -X POST https://api.leapocr.com/ocr/uploads/url \
  -H "X-API-Key: your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com/invoice.pdf",
    "format": "structured",
    "schema": {
      "type": "object",
      "properties": {
        "invoice_number": { "type": "string" },
        "invoice_date": { "type": "string" },
        "total": { "type": "number" }
      },
      "required": ["invoice_number", "invoice_date", "total"]
    },
    "instructions": "Extract invoice number, date, and total"
  }'

# Get result
curl https://api.leapocr.com/ocr/result/{job_id} \
  -H "X-API-Key: your-api-key"