Skip to main content
GET
/
tasks
/
{task_id}
Get Task
curl --request GET \
  --url https://api.chunkr.ai/tasks/{task_id} \
  --header 'Authorization: <api-key>'
{
  "completed": true,
  "configuration": {
    "chunk_processing": {
      "ignore_headers_and_footers": null,
      "target_length": 4096,
      "tokenizer": {
        "Enum": "Word"
      }
    },
    "error_handling": "Fail",
    "ocr_strategy": "All",
    "pipeline": "Chunkr",
    "segment_processing": {
      "Caption": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "Footnote": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "FormRegion": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "Formula": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "GraphicalItem": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "Legend": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "LineNumber": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "ListItem": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "Page": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "PageFooter": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "PageHeader": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "PageNumber": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "Picture": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "Table": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "Text": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "Title": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      },
      "Unknown": {
        "crop_image": "All",
        "description": null,
        "extended_context": null,
        "format": "Html",
        "llm": "<string>",
        "strategy": "LLM"
      }
    },
    "segmentation_strategy": "LayoutAnalysis"
  },
  "created_at": "2023-11-07T05:31:56Z",
  "file_info": {
    "url": "<string>",
    "mime_type": "<string>",
    "name": "<string>",
    "page_count": 1,
    "ss_cell_count": 1
  },
  "message": "<string>",
  "status": "Starting",
  "task_id": "<string>",
  "task_type": "Parse",
  "version_info": {
    "client_version": "Legacy",
    "server_version": "<string>"
  },
  "expires_at": "2023-11-07T05:31:56Z",
  "finished_at": "2023-11-07T05:31:56Z",
  "input_file_url": "<string>",
  "output": {
    "chunks": [
      {
        "chunk_length": 1,
        "segments": [
          {
            "bbox": {
              "height": 123,
              "left": 123,
              "top": 123,
              "width": 123
            },
            "page_height": 123,
            "page_number": 1,
            "page_width": 123,
            "segment_id": "<string>",
            "segment_type": "Caption",
            "confidence": 123,
            "content": "<string>",
            "description": "<string>",
            "embed": "<string>",
            "image": "<string>",
            "llm": "<string>",
            "ocr": [
              {
                "bbox": {
                  "height": 123,
                  "left": 123,
                  "top": 123,
                  "width": 123
                },
                "text": "<string>",
                "confidence": 123,
                "ocr_id": "<string>",
                "ss_cell_ref": "<string>"
              }
            ],
            "segment_length": 1,
            "ss_cells": [
              {
                "cell_id": "<string>",
                "range": "<string>",
                "text": "<string>",
                "formula": "<string>",
                "hyperlink": "<string>",
                "style": {
                  "align": "Left",
                  "bg_color": "<string>",
                  "font_face": "<string>",
                  "is_bold": true,
                  "text_color": "<string>",
                  "valign": "Top"
                },
                "value": "<string>"
              }
            ],
            "ss_header_bbox": {
              "height": 123,
              "left": 123,
              "top": 123,
              "width": 123
            },
            "ss_header_ocr": [
              {
                "bbox": {
                  "height": 123,
                  "left": 123,
                  "top": 123,
                  "width": 123
                },
                "text": "<string>",
                "confidence": 123,
                "ocr_id": "<string>",
                "ss_cell_ref": "<string>"
              }
            ],
            "ss_header_range": "<string>",
            "ss_header_text": "<string>",
            "ss_range": "<string>",
            "ss_sheet_name": "<string>",
            "text": "<string>"
          }
        ],
        "chunk_id": "<string>",
        "content": "<string>",
        "embed": "<string>"
      }
    ],
    "file_name": "<string>",
    "mime_type": "<string>",
    "page_count": 1,
    "pages": [
      {
        "image": "<string>",
        "page_height": 123,
        "page_number": 1,
        "page_width": 123,
        "dpi": 123,
        "ss_sheet_name": "<string>"
      }
    ],
    "pdf_url": "<string>"
  },
  "parse_task_id": "<string>",
  "started_at": "2023-11-07T05:31:56Z",
  "task_url": "<string>"
}

Authorizations

Authorization
string
header
required

Path Parameters

task_id
string | null
required

Id of the task to retrieve

Query Parameters

base64_urls
boolean

Whether to return base64 encoded URLs. If false, the URLs will be returned as presigned URLs.

include_chunks
boolean

Whether to include chunks in the output response

Response

Task details.

completed
boolean
required

True when the task reaches a terminal state i.e. status is Succeeded or Failed or Cancelled

configuration
Parse · object
required

Unified configuration type that can represent either parse or extract configurations

created_at
string<date-time>
required

The date and time when the task was created and queued.

file_info
object
required

Information about the input file.

message
string
required

A message describing the task's status or any errors that occurred.

status
enum<string>
required

The status of the task.

Available options:
Starting,
Processing,
Succeeded,
Failed,
Cancelled
task_id
string
required

The unique identifier for the task.

task_type
enum<string>
required
Available options:
Parse,
Extract
version_info
object
required

Version information for the task.

expires_at
string<date-time> | null

The date and time when the task will expire.

finished_at
string<date-time> | null

The date and time when the task was finished.

input_file_url
string | null
deprecated

The presigned URL of the input file. Deprecated use file_info.url instead.

output
Parse · object

Unified output type that can represent either parse or extract results

parse_task_id
string | null

The ID of the source parse task that was used for the task

started_at
string<date-time> | null

The date and time when the task was started.

task_url
string | null

The presigned URL of the task.