LeapOCRLeapOCR Docs

Common Use Cases

Real-world examples and patterns for document processing

Common Use Cases

Discover how to use LeapOCR for common document processing scenarios with production-ready examples.

Invoice Processing

Extract structured data from invoices for accounting automation.

Schema Design

{
  "type": "object",
  "properties": {
    "invoice_number": { "type": "string" },
    "invoice_date": {
      "type": "string",
      "description": "Date in YYYY-MM-DD format"
    },
    "due_date": {
      "type": "string",
      "description": "Payment due date in YYYY-MM-DD format"
    },
    "vendor": {
      "type": "object",
      "properties": {
        "name": { "type": "string" },
        "address": { "type": "string" },
        "tax_id": { "type": "string" }
      }
    },
    "line_items": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "description": { "type": "string" },
          "quantity": { "type": "number" },
          "unit_price": { "type": "number" },
          "total": { "type": "number" }
        }
      }
    },
    "subtotal": { "type": "number" },
    "tax": { "type": "number" },
    "total": { "type": "number" }
  },
  "required": ["invoice_number", "invoice_date", "total"]
}

Implementation

import { LeapOCR } from "leapocr";

const client = new LeapOCR({ apiKey: process.env.LEAPOCR_API_KEY });

async function processInvoice(invoiceUrl: string) {
  const result = await client.ocr.processURL(invoiceUrl, {
    format: "structured",
    model: "english-pro-v1", // Higher accuracy for financial docs
    schema: {
      invoice_number: "string",
      invoice_date: "string",
      due_date: "string",
      vendor: {
        name: "string",
        address: "string",
        tax_id: "string"
      },
      line_items: [{
        description: "string",
        quantity: "number",
        unit_price: "number",
        total: "number"
      }],
      subtotal: "number",
      tax: "number",
      total: "number"
    }
  });

  const invoice = await client.ocr.waitUntilDone(result.jobId);

  // Validate totals
  const calculatedTotal = invoice.data.subtotal + invoice.data.tax;
  if (Math.abs(calculatedTotal - invoice.data.total) > 0.01) {
    console.warn("Total mismatch - manual review required");
  }

  return invoice.data;
}
from leapocr import LeapOCR, ProcessOptions, Format, Model

async def process_invoice(invoice_url: str):
    async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
        result = await client.ocr.process_and_wait(
            invoice_url,
            options=ProcessOptions(
                format=Format.STRUCTURED,
                model=Model.ENGLISH_PRO_V1,
                schema={
                    "invoice_number": "string",
                    "invoice_date": "string",
                    "total": "number",
                    "line_items": [{
                        "description": "string",
                        "total": "number"
                    }]
                }
            )
        )

        # Validate and return
        return result.data
func processInvoice(ctx context.Context, client *ocr.SDK, url string) (*InvoiceData, error) {
    schema := map[string]interface{}{
        "invoice_number": "string",
        "invoice_date": "string",
        "total": "number",
    }

    job, err := client.ProcessURL(ctx, url,
        ocr.WithFormat(ocr.FormatStructured),
        ocr.WithModel(ocr.ModelEnglishProV1),
        ocr.WithSchema(schema),
    )
    if err != nil {
        return nil, err
    }

    result, err := client.WaitUntilDone(ctx, job.ID)
    return parseInvoice(result.Data), err
}

Receipt Scanning

Extract merchant and purchase information from receipts.

Simple Approach

const result = await client.ocr.processURL(receiptUrl, {
  format: "structured",
  model: "standard-v1", // Cost-effective for simple receipts
  instructions:
    "Extract merchant name, date, total amount, and all purchased items",
});

Advanced Schema

{
  "merchant_name": { "type": "string" },
  "date": { "type": "string" },
  "time": { "type": "string" },
  "items": {
    "type": "array",
    "items": {
      "type": "object",
      "properties": {
        "name": { "type": "string" },
        "price": { "type": "number" }
      }
    }
  },
  "subtotal": { "type": "number" },
  "tax": { "type": "number" },
  "total": { "type": "number" },
  "payment_method": { "type": "string" }
}

Form Extraction

Process standardized forms like applications, surveys, or government documents.

Multi-Page Form

For forms that span multiple pages with different sections:

const result = await client.ocr.processURL(formUrl, {
  format: "per_page_structured",
  model: "pro-v1",
  schema: {
    // Schema applies to each page
    section_title: "string",
    fields: [
      {
        label: "string",
        value: "string",
      },
    ],
  },
});

// Process each page
result.pages.forEach((page, index) => {
  console.log(`Page ${index + 1} - ${page.data.section_title}`);
  page.data.fields.forEach((field) => {
    console.log(`${field.label}: ${field.value}`);
  });
});

Medical Records

Extract patient information and clinical data from medical documents.

HIPAA Compliance Note

Ensure your implementation follows HIPAA compliance requirements when processing protected health information (PHI).

Example Schema

{
  "patient": {
    "type": "object",
    "properties": {
      "name": { "type": "string" },
      "date_of_birth": { "type": "string" },
      "mrn": { "type": "string", "description": "Medical Record Number" },
      "address": { "type": "string" }
    }
  },
  "visit_date": { "type": "string" },
  "provider": {
    "type": "object",
    "properties": {
      "name": { "type": "string" },
      "specialty": { "type": "string" }
    }
  },
  "diagnosis": {
    "type": "array",
    "items": { "type": "string" }
  },
  "medications": {
    "type": "array",
    "items": {
      "type": "object",
      "properties": {
        "name": { "type": "string" },
        "dosage": { "type": "string" },
        "frequency": { "type": "string" }
      }
    }
  },
  "vitals": {
    "type": "object",
    "properties": {
      "blood_pressure": { "type": "string" },
      "heart_rate": { "type": "number" },
      "temperature": { "type": "number" }
    }
  }
}

Contract Analysis

Extract key terms and parties from legal contracts.

const result = await client.ocr.processURL(contractUrl, {
  format: "structured",
  model: "pro-v1", // Highest accuracy for legal docs
  schema: {
    contract_type: "string",
    effective_date: "string",
    expiration_date: "string",
    parties: [
      {
        name: "string",
        role: "string",
        address: "string",
      },
    ],
    payment_terms: "string",
    termination_clause: "string",
    governing_law: "string",
  },
});

Bank Statements

Process bank statements for financial reconciliation.

const result = await client.ocr.processURL(statementUrl, {
  format: "structured",
  model: "english-pro-v1",
  schema: {
    account_number: "string",
    statement_period: {
      start_date: "string",
      end_date: "string",
    },
    opening_balance: "number",
    closing_balance: "number",
    transactions: [
      {
        date: "string",
        description: "string",
        amount: "number",
        type: "string", // "debit" or "credit"
      },
    ],
  },
});

// Validate balance
const totalDebits = result.data.transactions
  .filter((t) => t.type === "debit")
  .reduce((sum, t) => sum + t.amount, 0);

const totalCredits = result.data.transactions
  .filter((t) => t.type === "credit")
  .reduce((sum, t) => sum + t.amount, 0);

const calculatedBalance =
  result.data.opening_balance - totalDebits + totalCredits;

ID Document Verification

Extract information from driver's licenses, passports, and ID cards.

const result = await client.ocr.processURL(idDocumentUrl, {
  format: "structured",
  model: "pro-v1",
  schema: {
    document_type: "string",
    document_number: "string",
    full_name: "string",
    date_of_birth: "string",
    issue_date: "string",
    expiration_date: "string",
    address: "string",
    photo: "boolean", // Whether photo is present
  },
});

// Verify document hasn't expired
const expirationDate = new Date(result.data.expiration_date);
if (expirationDate < new Date()) {
  console.warn("Document has expired");
}

Best Practices

  1. Choose the Right Model

    • Use standard-v1 for simple documents and prototyping
    • Use english-pro-v1 for English-only documents requiring high accuracy
    • Use pro-v1 for critical applications and complex documents
  2. Design Focused Schemas

    • Extract only the fields you need
    • Use clear, descriptive field names
    • Add descriptions for ambiguous fields
  3. Validate Results

    • Check calculated vs. extracted totals
    • Verify required fields are present
    • Flag results for manual review when confidence is low
  4. Handle Errors Gracefully

    • Implement retry logic for transient failures
    • Log failures for debugging
    • Have fallback workflows for critical processes
  5. Monitor Performance

    • Track processing times
    • Monitor credit usage
    • Set up alerts for failures