Multimodal Perception
Use the Multimodal Perception service to process documents and videos, manage uploaded files, and enable semantic retrieval.
Supported Formats
formats = client.unstructured.formats()
print(formats.categories)
category = client.unstructured.detect("report.pdf")
print(category)Document Jobs
job = client.unstructured.jobs.create(
source={"source_type": "url", "url": "https://example.com/report.pdf"},
options={"chunking": {"enabled": True, "chunk_size": 1000}},
webhook_url="https://your-app.com/webhook",
)
result = client.unstructured.jobs.wait_until_complete(
job.job_id,
poll_interval=2.0,
timeout=300.0,
)
job_status = client.unstructured.jobs.get(job.job_id)
job_result = client.unstructured.jobs.result(job.job_id)
jobs = client.unstructured.jobs.list()
client.unstructured.jobs.cancel(job.job_id)Batch Processing
batch = client.unstructured.jobs.create_batch(
files=[
{"source_type": "url", "url": "https://example.com/doc1.pdf"},
{"source_type": "url", "url": "https://example.com/doc2.docx"},
{"source_type": "storage", "bucket": "my-bucket", "path": "reports/q1.pdf"},
],
options={"chunking": {"enabled": True}},
)
result = client.unstructured.jobs.wait_batch_complete(
batch.batch_id,
poll_interval=3.0,
timeout=600.0,
)
batches = client.unstructured.jobs.list_batches(page=1, page_size=20)| Method | Return Type | Description |
|---|---|---|
jobs.create(...) | Job | Start a single processing job |
jobs.get(job_id) | Job | Get job status |
jobs.list() | list[Job] | List all jobs |
jobs.result(job_id) | dict | Get processing result |
jobs.cancel(job_id) | dict | Cancel a running job |
jobs.create_batch(...) | BatchJob | Start a batch processing job |
jobs.get_batch(batch_id) | BatchJob | Get batch job status |
jobs.list_batches(...) | dict | List batch jobs |
File Management
One-call upload (recommended)
files.upload_file(path) transfers the local file to the playground and
creates the file row in one call. It auto-selects the transport based on
file size: single PUT for < multipart_threshold (default 25 MiB),
parallel multipart for larger files (8 MiB parts, 8 concurrent streams by
default). On any part failure the SDK aborts the multipart session so
partial bytes don't accumulate.
file = client.unstructured.files.upload_file(
"report.pdf",
# multipart_threshold=25 * 1024 * 1024, # default
# part_size=8 * 1024 * 1024, # default
# max_concurrency=8, # default
)
client.unstructured.files.process(file.file_id)Requires unstructured-api v1.8.19+ for the multipart path (smaller
files always work). Pass multipart_threshold=0 to force multipart.
Low-level upload (manual two-step)
Use this when you want to drive the PUT yourself (browser uploads,
custom retry logic). files.upload returns the presigned URL only; you
PUT the bytes and then call files.create to register the row.
upload = client.unstructured.files.upload("report.pdf")
file = client.unstructured.files.create(
file_id=upload["file_id"],
filename="report.pdf",
file_size_bytes=102400,
)
file = client.unstructured.files.import_from_storage(
bucket_id="bkt_01H...",
key="reports/q1.pdf",
filename="q1.pdf",
# org_id, file_size_bytes, mime_type are optional;
# workspace context is resolved server-side from bucket_id.
)
files = client.unstructured.files.list()
detail = client.unstructured.files.get(file.file_id)
viewer = client.unstructured.files.viewer(file.file_id)
# files.process kicks off processing and returns the updated file
# record (status: "processing"). Poll files.get to track completion.
file = client.unstructured.files.process(file.file_id)
while file.status in ("queued", "processing"):
file = client.unstructured.files.get(file.file_id)
url = client.unstructured.files.download_url(file.file_id)
client.unstructured.files.delete(file.file_id)Low-level multipart upload
When you need resumable uploads, custom concurrency, or to drive the
transfer from a browser, call the multipart endpoints directly. The
upload_token returned from init is an HMAC bound to the caller's
JWT subject — it must be echoed back on every complete / abort.
init = client.unstructured.files.upload_multipart_init(
filename="movie.mp4",
file_size_bytes=size,
part_size=8 * 1024 * 1024, # 8 MiB; AWS floor is 5 MiB
)
# PUT each chunk to init.parts[i].upload_url, capture the ETag from
# the response, then echo back here.
client.unstructured.files.upload_multipart_complete(
file_id=init.file_id,
upload_id=init.upload_id,
storage_path=init.storage_path,
upload_token=init.upload_token,
parts=[{"part_number": 1, "etag": "..."}],
)
# On error, abort to release any buffered parts immediately.
client.unstructured.files.upload_multipart_abort(
file_id=init.file_id,
upload_id=init.upload_id,
storage_path=init.storage_path,
upload_token=init.upload_token,
)Requires unstructured-api v1.8.19+. After complete, register the
playground row with files.create(file_id, filename, file_size_bytes)
and kick off processing with files.process(file_id).
Video Search and Chat
results = client.unstructured.video.search(
"product demo walkthrough",
top_k=10,
similarity_threshold=0.7,
rerank=True,
max_per_video=3,
)
for event in client.unstructured.video.chat(
[{"role": "user", "content": "Summarize the key points in this video"}],
stream=True,
):
print(event.data)
response = client.unstructured.video.chat(
[{"role": "user", "content": "What products are shown?"}],
stream=False,
)Video Embeddings
stats = client.unstructured.video.embeddings.stats()
coverage = client.unstructured.video.embeddings.coverage()
client.unstructured.video.embeddings.backfill(max_files=10)
client.unstructured.video.embeddings.delete("video_id")