Back to API Docs

Advanced OCR (DocTR)

Neural network-based OCR powered by DocTR. Higher accuracy than Tesseract with word-level bounding boxes and per-word confidence scores. Ideal for structured document processing where you need spatial data.

POSThttps://app.alternapdf.com/api/v1/ocr/extract/advanced

Content-Type: multipart/form-data

Documents over 10 pages are automatically processed asynchronously. Append ?async=true to force async processing for any request.

Parameters

ParameterTypeRequiredDescription
filefileYesPDF or image file (PNG, JPEG, WebP)
outputstringNosimple or detailed. Default: simple. Use detailed to get word-level bounding boxes.
enhancebooleanNoEnable AI post-processing to clean up OCR output. Default: false

Code Examples

cURL
curl -X POST "https://app.alternapdf.com/api/v1/ocr/extract/advanced" \
  -H "X-API-Key: YOUR_API_KEY" \
  -F "file=@scanned-document.pdf" \
  -F "output=detailed" \
  -F "enhance=false"
Python
import requests

url = "https://app.alternapdf.com/api/v1/ocr/extract/advanced"
headers = {"X-API-Key": "YOUR_API_KEY"}

with open("scanned-document.pdf", "rb") as f:
    files = {"file": ("scanned-document.pdf", f, "application/pdf")}
    data = {"output": "detailed", "enhance": "false"}
    response = requests.post(url, headers=headers, files=files, data=data)

result = response.json()

# Access full text
print(result["data"]["text"])

# Iterate over pages and words with bounding boxes
for page in result["data"]["pages"]:
    print(f"\nPage {page['page_number']} ({page['dimensions']['width']}x{page['dimensions']['height']})")
    for word in page["words"]:
        bbox = word["bbox"]
        print(f"  '{word['text']}' confidence={word['confidence']:.2f} "
              f"at ({bbox['x_min']},{bbox['y_min']})-({bbox['x_max']},{bbox['y_max']})")
JavaScript
const fs = require("fs");
const FormData = require("form-data");

const form = new FormData();
form.append("file", fs.createReadStream("scanned-document.pdf"));
form.append("output", "detailed");
form.append("enhance", "false");

const response = await fetch("https://app.alternapdf.com/api/v1/ocr/extract/advanced", {
  method: "POST",
  headers: {
    "X-API-Key": "YOUR_API_KEY",
    ...form.getHeaders(),
  },
  body: form,
});

const result = await response.json();

// Access full text
console.log(result.data.text);

// Iterate over pages and words with bounding boxes
for (const page of result.data.pages) {
  console.log(`\nPage ${page.page_number} (${page.dimensions.width}x${page.dimensions.height})`);
  for (const word of page.words) {
    const { x_min, y_min, x_max, y_max } = word.bbox;
    console.log(`  '${word.text}' confidence=${word.confidence.toFixed(2)} at (${x_min},${y_min})-(${x_max},${y_max})`);
  }
}

Response

With output=detailed, the response includes per-page word arrays with bounding box coordinates and confidence scores.

JSON Response (detailed output)
{
  "success": true,
  "data": {
    "text": "Invoice #12345\nDate: 2024-01-15\n\nTotal: $125.00",
    "pages": [
      {
        "page_number": 1,
        "text": "Invoice #12345\nDate: 2024-01-15\n\nTotal: $125.00",
        "dimensions": {
          "width": 2480,
          "height": 3508
        },
        "words": [
          {
            "text": "Invoice",
            "confidence": 0.98,
            "bbox": {
              "x_min": 120,
              "y_min": 85,
              "x_max": 380,
              "y_max": 125
            }
          },
          {
            "text": "#12345",
            "confidence": 0.95,
            "bbox": {
              "x_min": 395,
              "y_min": 85,
              "x_max": 560,
              "y_max": 125
            }
          },
          {
            "text": "Date:",
            "confidence": 0.97,
            "bbox": {
              "x_min": 120,
              "y_min": 150,
              "x_max": 220,
              "y_max": 185
            }
          },
          {
            "text": "2024-01-15",
            "confidence": 0.96,
            "bbox": {
              "x_min": 235,
              "y_min": 150,
              "x_max": 445,
              "y_max": 185
            }
          },
          {
            "text": "Total:",
            "confidence": 0.99,
            "bbox": {
              "x_min": 120,
              "y_min": 320,
              "x_max": 240,
              "y_max": 355
            }
          },
          {
            "text": "$125.00",
            "confidence": 0.94,
            "bbox": {
              "x_min": 255,
              "y_min": 320,
              "x_max": 410,
              "y_max": 355
            }
          }
        ]
      }
    ],
    "confidence": 0.96,
    "word_count": 6
  },
  "metadata": {
    "engine": "advanced",
    "processing_time_ms": 3420,
    "filename": "scanned-document.pdf"
  }
}

Response Fields

FieldTypeDescription
data.textstringExtracted text from all pages
data.pages[]arrayPer-page results (detailed output only)
pages[].page_numberinteger1-indexed page number
pages[].dimensionsobjectPage width and height in pixels
pages[].words[]arrayArray of detected words with position data
words[].bboxobjectBounding box with x_min, y_min, x_max, y_max (pixels)
words[].confidencefloatPer-word confidence score (0.0 to 1.0)
data.confidencefloatOverall OCR confidence score
metadata.enginestringAlways "advanced" for this endpoint