Remote URL Upload
Create a job and start processing from a remote URL
Remote URL Upload
Create a job and start processing from a remote URL. Supported format: PDF (.pdf) only.
Endpoint
POST /ocr/uploads/urlParameters
| Parameter | Type | Required | Description |
|---|---|---|---|
url | string | Yes | URL to PDF document |
output_type | string | No | Output format: structured, markdown, per_page_structured |
model | string | No | Model to use (default: standard-v1) |
instruction | string | No | Processing instructions (cannot use with schema or template_slug) |
schema | object | No | JSON schema for extraction (cannot use with instruction) |
template_slug | string | No | Document template slug (cannot use with instruction or schema) |
Note: Only one of template_slug, schema, or instruction can be
provided per request.
Output Types
structured: Structured data extraction. Requires eithertemplate_slugOR format (withschema&instructions)markdown: Page-by-page OCR. All configuration fields are optionalper_page_structured: Per-page structured extraction
SDK Examples
import { LeapOCR } from "leapocr";
const client = new LeapOCR({
apiKey: process.env.LEAPOCR_API_KEY,
});
// Submit a document from URL
const job = await client.ocr.processURL(
"https://example.com/document.pdf"
);
console.log(`Job created: ${job.jobId}`);
// Wait for processing to complete
const result = await client.ocr.waitUntilDone(job.jobId);
console.log("Processing complete!");curl -X POST https://api.leapocr.com/api/v1/ocr/uploads/url \
-H "X-API-Key: your-api-key" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com/document.pdf"
}'import asyncio
import os
from leapocr import LeapOCR
async def main():
async with LeapOCR(os.getenv("LEAPOCR_API_KEY")) as client:
# Submit a document from URL
job = await client.ocr.process_url(
"https://example.com/document.pdf"
)
print(f"Job created: {job.job_id}")
# Wait for processing to complete
result = await client.ocr.wait_until_done(job.job_id)
print("Processing complete!")
asyncio.run(main())package main
import (
"context"
"fmt"
"log"
"os"
"github.com/leapocr/leapocr-go"
)
func main() {
client, err := ocr.New(os.Getenv("LEAPOCR_API_KEY"))
if err != nil {
log.Fatal(err)
}
ctx := context.Background()
// Submit a document from URL
job, err := client.ProcessURL(ctx, "https://example.com/document.pdf")
if err != nil {
log.Fatal(err)
}
fmt.Printf("Job created: %s\n", job.ID)
// Wait for processing to complete
result, err := client.WaitUntilDone(ctx, job.ID)
if err != nil {
log.Fatal(err)
}
fmt.Println("Processing complete!")
}Response
{
"job_id": "job_abc123"
}Error Responses
| Status Code | Description |
|---|---|
400 | Bad Request |
401 | Unauthorized |
402 | Insufficient credits |
500 | Internal Server Error |
Using Templates
You can use pre-configured templates for common document types:
const job = await client.ocr.processURL(
"https://example.com/document.pdf",
{
templateSlug: "invoice-extraction",
}
);curl -X POST https://api.leapocr.com/api/v1/ocr/uploads/url \
-H "X-API-Key: your-api-key" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com/document.pdf",
"template_slug": "invoice-extraction"
}'from leapocr import ProcessOptions
job = await client.ocr.process_url(
"https://example.com/document.pdf",
options=ProcessOptions(
template_slug="invoice-extraction"
),
)job, err := client.ProcessURL(ctx,
"https://example.com/document.pdf",
ocr.WithTemplateSlug("invoice-extraction"),
)Custom Schema Extraction
Define custom extraction schemas with instructions for specific use cases:
const job = await client.ocr.processURL(
"https://example.com/document.pdf",
{
format: "structured",
schema: {
type: "object",
properties: {
invoice_number: { type: "string" },
date: { type: "string" },
total_amount: { type: "number" },
},
},
instructions: "Extract invoice details from the document",
}
);curl -X POST https://api.leapocr.com/api/v1/ocr/uploads/url \
-H "X-API-Key: your-api-key" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com/document.pdf",
"format": "structured",
"schema": {
"type": "object",
"properties": {
"invoice_number": {"type": "string"},
"date": {"type": "string"},
"total_amount": {"type": "number"}
}
},
"instructions": "Extract invoice details from the document"
}'from leapocr import ProcessOptions, Format
job = await client.ocr.process_url(
"https://example.com/document.pdf",
options=ProcessOptions(
format=Format.STRUCTURED,
schema={
"type": "object",
"properties": {
"invoice_number": {"type": "string"},
"date": {"type": "string"},
"total_amount": {"type": "number"},
},
},
instructions="Extract invoice details from the document",
),
)schema := map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"invoice_number": map[string]string{"type": "string"},
"date": map[string]string{"type": "string"},
"total_amount": map[string]string{"type": "number"},
},
}
job, err := client.ProcessURL(ctx,
"https://example.com/document.pdf",
ocr.WithFormat(ocr.FormatStructured),
ocr.WithSchema(schema),
ocr.WithInstructions("Extract invoice details from the document"),
)Next Steps
- Get Job Status - Check processing progress
- Get Job Result - Retrieve the extracted data
- Delete Job - Remove sensitive data