Back to API Docs
Advanced OCR (DocTR)
Neural network-based OCR powered by DocTR. Higher accuracy than Tesseract with word-level bounding boxes and per-word confidence scores. Ideal for structured document processing where you need spatial data.
POST
https://app.alternapdf.com/api/v1/ocr/extract/advancedContent-Type: multipart/form-data
Documents over 10 pages are automatically processed asynchronously. Append ?async=true to force async processing for any request.
Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
file | file | Yes | PDF or image file (PNG, JPEG, WebP) |
output | string | No | simple or detailed. Default: simple. Use detailed to get word-level bounding boxes. |
enhance | boolean | No | Enable AI post-processing to clean up OCR output. Default: false |
Code Examples
cURL
curl -X POST "https://app.alternapdf.com/api/v1/ocr/extract/advanced" \
-H "X-API-Key: YOUR_API_KEY" \
-F "file=@scanned-document.pdf" \
-F "output=detailed" \
-F "enhance=false"Python
import requests
url = "https://app.alternapdf.com/api/v1/ocr/extract/advanced"
headers = {"X-API-Key": "YOUR_API_KEY"}
with open("scanned-document.pdf", "rb") as f:
files = {"file": ("scanned-document.pdf", f, "application/pdf")}
data = {"output": "detailed", "enhance": "false"}
response = requests.post(url, headers=headers, files=files, data=data)
result = response.json()
# Access full text
print(result["data"]["text"])
# Iterate over pages and words with bounding boxes
for page in result["data"]["pages"]:
print(f"\nPage {page['page_number']} ({page['dimensions']['width']}x{page['dimensions']['height']})")
for word in page["words"]:
bbox = word["bbox"]
print(f" '{word['text']}' confidence={word['confidence']:.2f} "
f"at ({bbox['x_min']},{bbox['y_min']})-({bbox['x_max']},{bbox['y_max']})")JavaScript
const fs = require("fs");
const FormData = require("form-data");
const form = new FormData();
form.append("file", fs.createReadStream("scanned-document.pdf"));
form.append("output", "detailed");
form.append("enhance", "false");
const response = await fetch("https://app.alternapdf.com/api/v1/ocr/extract/advanced", {
method: "POST",
headers: {
"X-API-Key": "YOUR_API_KEY",
...form.getHeaders(),
},
body: form,
});
const result = await response.json();
// Access full text
console.log(result.data.text);
// Iterate over pages and words with bounding boxes
for (const page of result.data.pages) {
console.log(`\nPage ${page.page_number} (${page.dimensions.width}x${page.dimensions.height})`);
for (const word of page.words) {
const { x_min, y_min, x_max, y_max } = word.bbox;
console.log(` '${word.text}' confidence=${word.confidence.toFixed(2)} at (${x_min},${y_min})-(${x_max},${y_max})`);
}
}Response
With output=detailed, the response includes per-page word arrays with bounding box coordinates and confidence scores.
JSON Response (detailed output)
{
"success": true,
"data": {
"text": "Invoice #12345\nDate: 2024-01-15\n\nTotal: $125.00",
"pages": [
{
"page_number": 1,
"text": "Invoice #12345\nDate: 2024-01-15\n\nTotal: $125.00",
"dimensions": {
"width": 2480,
"height": 3508
},
"words": [
{
"text": "Invoice",
"confidence": 0.98,
"bbox": {
"x_min": 120,
"y_min": 85,
"x_max": 380,
"y_max": 125
}
},
{
"text": "#12345",
"confidence": 0.95,
"bbox": {
"x_min": 395,
"y_min": 85,
"x_max": 560,
"y_max": 125
}
},
{
"text": "Date:",
"confidence": 0.97,
"bbox": {
"x_min": 120,
"y_min": 150,
"x_max": 220,
"y_max": 185
}
},
{
"text": "2024-01-15",
"confidence": 0.96,
"bbox": {
"x_min": 235,
"y_min": 150,
"x_max": 445,
"y_max": 185
}
},
{
"text": "Total:",
"confidence": 0.99,
"bbox": {
"x_min": 120,
"y_min": 320,
"x_max": 240,
"y_max": 355
}
},
{
"text": "$125.00",
"confidence": 0.94,
"bbox": {
"x_min": 255,
"y_min": 320,
"x_max": 410,
"y_max": 355
}
}
]
}
],
"confidence": 0.96,
"word_count": 6
},
"metadata": {
"engine": "advanced",
"processing_time_ms": 3420,
"filename": "scanned-document.pdf"
}
}Response Fields
| Field | Type | Description |
|---|---|---|
data.text | string | Extracted text from all pages |
data.pages[] | array | Per-page results (detailed output only) |
pages[].page_number | integer | 1-indexed page number |
pages[].dimensions | object | Page width and height in pixels |
pages[].words[] | array | Array of detected words with position data |
words[].bbox | object | Bounding box with x_min, y_min, x_max, y_max (pixels) |
words[].confidence | float | Per-word confidence score (0.0 to 1.0) |
data.confidence | float | Overall OCR confidence score |
metadata.engine | string | Always "advanced" for this endpoint |