LeapOCRLeapOCR Docs

Remote URL Upload

Create a job and start processing from a remote URL

Remote URL Upload

Create a job and start processing from a remote URL. Supported format: PDF (.pdf) only.

Endpoint

POST /ocr/uploads/url

Parameters

ParameterTypeRequiredDescription
urlstringYesURL to PDF document
output_typestringNoOutput format: structured, markdown, per_page_structured
modelstringNoModel to use (default: standard-v1)
instructionstringNoProcessing instructions (cannot use with schema or template_slug)
schemaobjectNoJSON schema for extraction (cannot use with instruction)
template_slugstringNoDocument template slug (cannot use with instruction or schema)

Note: Only one of template_slug, schema, or instruction can be provided per request.

Output Types

  • structured: Structured data extraction. Requires either template_slug OR format (with schema & instructions)
  • markdown: Page-by-page OCR. All configuration fields are optional
  • per_page_structured: Per-page structured extraction

SDK Examples

import { LeapOCR } from "leapocr";

const client = new LeapOCR({
  apiKey: process.env.LEAPOCR_API_KEY,
});

// Submit a document from URL
const job = await client.ocr.processURL(
  "https://example.com/document.pdf"
);

console.log(`Job created: ${job.jobId}`);

// Wait for processing to complete
const result = await client.ocr.waitUntilDone(job.jobId);
console.log("Processing complete!");
curl -X POST https://api.leapocr.com/api/v1/ocr/uploads/url \
  -H "X-API-Key: your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com/document.pdf"
  }'
import asyncio
import os
from leapocr import LeapOCR

async def main():
    async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
        # Submit a document from URL
        job = await client.ocr.process_url(
            "https://example.com/document.pdf"
        )

        print(f"Job created: {job.job_id}")

        # Wait for processing to complete
        result = await client.ocr.wait_until_done(job.job_id)
        print("Processing complete!")

asyncio.run(main())
package main

import (
    "context"
    "fmt"
    "log"
    "os"
    "github.com/leapocr/leapocr-go"
)

func main() {
    client, err := ocr.New(os.Getenv("LEAPOCR_API_KEY"))
    if err != nil {
        log.Fatal(err)
    }

    ctx := context.Background()

    // Submit a document from URL
    job, err := client.ProcessURL(ctx, "https://example.com/document.pdf")
    if err != nil {
        log.Fatal(err)
    }

    fmt.Printf("Job created: %s\n", job.ID)

    // Wait for processing to complete
    result, err := client.WaitUntilDone(ctx, job.ID)
    if err != nil {
        log.Fatal(err)
    }

    fmt.Println("Processing complete!")
}

Response

{
  "job_id": "job_abc123"
}

Error Responses

Status CodeDescription
400Bad Request
401Unauthorized
402Insufficient credits
500Internal Server Error

Using Templates

You can use pre-configured templates for common document types:

const job = await client.ocr.processURL(
  "https://example.com/document.pdf",
  {
    templateSlug: "invoice-extraction",
  }
);
curl -X POST https://api.leapocr.com/api/v1/ocr/uploads/url \
  -H "X-API-Key: your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com/document.pdf",
    "template_slug": "invoice-extraction"
  }'
from leapocr import ProcessOptions

job = await client.ocr.process_url(
    "https://example.com/document.pdf",
    options=ProcessOptions(
        template_slug="invoice-extraction"
    ),
)
job, err := client.ProcessURL(ctx,
    "https://example.com/document.pdf",
    ocr.WithTemplateSlug("invoice-extraction"),
)

Custom Schema Extraction

Define custom extraction schemas with instructions for specific use cases:

const job = await client.ocr.processURL(
  "https://example.com/document.pdf",
  {
    format: "structured",
    schema: {
      type: "object",
      properties: {
        invoice_number: { type: "string" },
        date: { type: "string" },
        total_amount: { type: "number" },
      },
    },
    instructions: "Extract invoice details from the document",
  }
);
curl -X POST https://api.leapocr.com/api/v1/ocr/uploads/url \
  -H "X-API-Key: your-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://example.com/document.pdf",
    "format": "structured",
    "schema": {
      "type": "object",
      "properties": {
        "invoice_number": {"type": "string"},
        "date": {"type": "string"},
        "total_amount": {"type": "number"}
      }
    },
    "instructions": "Extract invoice details from the document"
  }'
from leapocr import ProcessOptions, Format

job = await client.ocr.process_url(
    "https://example.com/document.pdf",
    options=ProcessOptions(
        format=Format.STRUCTURED,
        schema={
            "type": "object",
            "properties": {
                "invoice_number": {"type": "string"},
                "date": {"type": "string"},
                "total_amount": {"type": "number"},
            },
        },
        instructions="Extract invoice details from the document",
    ),
)
schema := map[string]interface{}{
    "type": "object",
    "properties": map[string]interface{}{
        "invoice_number": map[string]string{"type": "string"},
        "date": map[string]string{"type": "string"},
        "total_amount": map[string]string{"type": "number"},
    },
}

job, err := client.ProcessURL(ctx,
    "https://example.com/document.pdf",
    ocr.WithFormat(ocr.FormatStructured),
    ocr.WithSchema(schema),
    ocr.WithInstructions("Extract invoice details from the document"),
)

Next Steps