curl --request GET \
--url https://api.chunkr.ai/tasks/{task_id}/parse \
--header 'Authorization: <api-key>'{
"completed": true,
"configuration": {
"chunk_processing": {
"ignore_headers_and_footers": null,
"target_length": 4096,
"tokenizer": {
"Enum": "Word"
}
},
"error_handling": "Fail",
"ocr_strategy": "All",
"pipeline": "Chunkr",
"segment_processing": {
"Caption": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Footnote": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"FormRegion": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Formula": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"GraphicalItem": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Legend": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"LineNumber": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"ListItem": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Page": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"PageFooter": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"PageHeader": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"PageNumber": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Picture": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Table": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Text": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Title": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Unknown": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
}
},
"segmentation_strategy": "LayoutAnalysis"
},
"created_at": "2023-11-07T05:31:56Z",
"file_info": {
"url": "<string>",
"mime_type": "<string>",
"name": "<string>",
"page_count": 1,
"ss_cell_count": 1
},
"message": "<string>",
"status": "Starting",
"task_id": "<string>",
"task_type": "Parse",
"version_info": {
"client_version": "Legacy",
"server_version": "<string>"
},
"expires_at": "2023-11-07T05:31:56Z",
"finished_at": "2023-11-07T05:31:56Z",
"input_file_url": "<string>",
"output": {
"chunks": [
{
"chunk_length": 1,
"segments": [
{
"bbox": {
"height": 123,
"left": 123,
"top": 123,
"width": 123
},
"page_height": 123,
"page_number": 1,
"page_width": 123,
"segment_id": "<string>",
"segment_type": "Caption",
"confidence": 123,
"content": "<string>",
"description": "<string>",
"embed": "<string>",
"image": "<string>",
"llm": "<string>",
"ocr": [
{
"bbox": {
"height": 123,
"left": 123,
"top": 123,
"width": 123
},
"text": "<string>",
"confidence": 123,
"ocr_id": "<string>",
"ss_cell_ref": "<string>"
}
],
"segment_length": 1,
"ss_cells": [
{
"cell_id": "<string>",
"range": "<string>",
"text": "<string>",
"formula": "<string>",
"hyperlink": "<string>",
"style": {
"align": "Left",
"bg_color": "<string>",
"font_face": "<string>",
"is_bold": true,
"text_color": "<string>",
"valign": "Top"
},
"value": "<string>"
}
],
"ss_header_bbox": {
"height": 123,
"left": 123,
"top": 123,
"width": 123
},
"ss_header_ocr": [
{
"bbox": {
"height": 123,
"left": 123,
"top": 123,
"width": 123
},
"text": "<string>",
"confidence": 123,
"ocr_id": "<string>",
"ss_cell_ref": "<string>"
}
],
"ss_header_range": "<string>",
"ss_header_text": "<string>",
"ss_range": "<string>",
"ss_sheet_name": "<string>",
"text": "<string>"
}
],
"chunk_id": "<string>",
"content": "<string>",
"embed": "<string>"
}
],
"file_name": "<string>",
"mime_type": "<string>",
"page_count": 1,
"pages": [
{
"image": "<string>",
"page_height": 123,
"page_number": 1,
"page_width": 123,
"dpi": 123,
"ss_sheet_name": "<string>"
}
],
"pdf_url": "<string>"
},
"started_at": "2023-11-07T05:31:56Z",
"task_url": "<string>"
}Retrieves the current state of a parse task.
Returns task details such as processing status, configuration, output (when available), file metadata, and timestamps.
Typical uses:
curl --request GET \
--url https://api.chunkr.ai/tasks/{task_id}/parse \
--header 'Authorization: <api-key>'{
"completed": true,
"configuration": {
"chunk_processing": {
"ignore_headers_and_footers": null,
"target_length": 4096,
"tokenizer": {
"Enum": "Word"
}
},
"error_handling": "Fail",
"ocr_strategy": "All",
"pipeline": "Chunkr",
"segment_processing": {
"Caption": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Footnote": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"FormRegion": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Formula": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"GraphicalItem": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Legend": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"LineNumber": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"ListItem": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Page": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"PageFooter": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"PageHeader": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"PageNumber": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Picture": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Table": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Text": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Title": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
},
"Unknown": {
"crop_image": "All",
"description": null,
"extended_context": null,
"format": "Html",
"llm": "<string>",
"strategy": "LLM"
}
},
"segmentation_strategy": "LayoutAnalysis"
},
"created_at": "2023-11-07T05:31:56Z",
"file_info": {
"url": "<string>",
"mime_type": "<string>",
"name": "<string>",
"page_count": 1,
"ss_cell_count": 1
},
"message": "<string>",
"status": "Starting",
"task_id": "<string>",
"task_type": "Parse",
"version_info": {
"client_version": "Legacy",
"server_version": "<string>"
},
"expires_at": "2023-11-07T05:31:56Z",
"finished_at": "2023-11-07T05:31:56Z",
"input_file_url": "<string>",
"output": {
"chunks": [
{
"chunk_length": 1,
"segments": [
{
"bbox": {
"height": 123,
"left": 123,
"top": 123,
"width": 123
},
"page_height": 123,
"page_number": 1,
"page_width": 123,
"segment_id": "<string>",
"segment_type": "Caption",
"confidence": 123,
"content": "<string>",
"description": "<string>",
"embed": "<string>",
"image": "<string>",
"llm": "<string>",
"ocr": [
{
"bbox": {
"height": 123,
"left": 123,
"top": 123,
"width": 123
},
"text": "<string>",
"confidence": 123,
"ocr_id": "<string>",
"ss_cell_ref": "<string>"
}
],
"segment_length": 1,
"ss_cells": [
{
"cell_id": "<string>",
"range": "<string>",
"text": "<string>",
"formula": "<string>",
"hyperlink": "<string>",
"style": {
"align": "Left",
"bg_color": "<string>",
"font_face": "<string>",
"is_bold": true,
"text_color": "<string>",
"valign": "Top"
},
"value": "<string>"
}
],
"ss_header_bbox": {
"height": 123,
"left": 123,
"top": 123,
"width": 123
},
"ss_header_ocr": [
{
"bbox": {
"height": 123,
"left": 123,
"top": 123,
"width": 123
},
"text": "<string>",
"confidence": 123,
"ocr_id": "<string>",
"ss_cell_ref": "<string>"
}
],
"ss_header_range": "<string>",
"ss_header_text": "<string>",
"ss_range": "<string>",
"ss_sheet_name": "<string>",
"text": "<string>"
}
],
"chunk_id": "<string>",
"content": "<string>",
"embed": "<string>"
}
],
"file_name": "<string>",
"mime_type": "<string>",
"page_count": 1,
"pages": [
{
"image": "<string>",
"page_height": 123,
"page_number": 1,
"page_width": 123,
"dpi": 123,
"ss_sheet_name": "<string>"
}
],
"pdf_url": "<string>"
},
"started_at": "2023-11-07T05:31:56Z",
"task_url": "<string>"
}Id of the task to retrieve
Whether to return base64 encoded URLs. If false, the URLs will be returned as presigned URLs.
Whether to include chunks in the output response
Task details.
True when the task reaches a terminal state i.e. status is Succeeded or Failed or Cancelled
Show child attributes
The date and time when the task was created and queued.
Information about the input file.
Show child attributes
A message describing the task's status or any errors that occurred.
The status of the task.
Starting, Processing, Succeeded, Failed, Cancelled The unique identifier for the task.
Parse, Extract Version information for the task.
Show child attributes
The date and time when the task will expire.
The date and time when the task was finished.
The presigned URL of the input file.
Deprecated use file_info.url instead.
The processed results of a document parsing task
Show child attributes
The date and time when the task was started.
The presigned URL of the task.