TouAI

Unstructured Data

Use the Unstructured Data service to process documents and videos, manage uploaded files, and enable semantic retrieval.

Supported Formats

formats = client.unstructured.formats()
print(formats.categories)
 
category = client.unstructured.detect("report.pdf")
print(category)

Document Jobs

job = client.unstructured.jobs.create(
    source={"source_type": "url", "url": "https://example.com/report.pdf"},
    options={"chunking": {"enabled": True, "chunk_size": 1000}},
    webhook_url="https://your-app.com/webhook",
)
 
result = client.unstructured.jobs.wait_until_complete(
    job.job_id,
    poll_interval=2.0,
    timeout=300.0,
)
 
job_status = client.unstructured.jobs.get(job.job_id)
job_result = client.unstructured.jobs.result(job.job_id)
jobs = client.unstructured.jobs.list()
client.unstructured.jobs.cancel(job.job_id)

Batch Processing

batch = client.unstructured.jobs.create_batch(
    files=[
        {"source_type": "url", "url": "https://example.com/doc1.pdf"},
        {"source_type": "url", "url": "https://example.com/doc2.docx"},
        {"source_type": "storage", "bucket": "my-bucket", "path": "reports/q1.pdf"},
    ],
    options={"chunking": {"enabled": True}},
)
 
result = client.unstructured.jobs.wait_batch_complete(
    batch.batch_id,
    poll_interval=3.0,
    timeout=600.0,
)
 
batches = client.unstructured.jobs.list_batches(page=1, page_size=20)
MethodReturn TypeDescription
jobs.create(...)JobStart a single processing job
jobs.get(job_id)JobGet job status
jobs.list()list[Job]List all jobs
jobs.result(job_id)dictGet processing result
jobs.cancel(job_id)dictCancel a running job
jobs.create_batch(...)BatchJobStart a batch processing job
jobs.get_batch(batch_id)BatchJobGet batch job status
jobs.list_batches(...)dictList batch jobs

File Management

upload = client.unstructured.files.upload("report.pdf")
 
file = client.unstructured.files.create(
    file_id=upload["file_id"],
    filename="report.pdf",
    file_size_bytes=102400,
)
 
file = client.unstructured.files.import_from_storage(
    storage_path="reports/q1.pdf",
    bucket="my-bucket",
)
 
files = client.unstructured.files.list()
detail = client.unstructured.files.get(file.file_id)
viewer = client.unstructured.files.viewer(file.file_id)
job = client.unstructured.files.process(file.file_id)
url = client.unstructured.files.download_url(file.file_id)
client.unstructured.files.delete(file.file_id)

Video Search and Chat

results = client.unstructured.video.search(
    "product demo walkthrough",
    top_k=10,
    similarity_threshold=0.7,
    rerank=True,
    max_per_video=3,
)
 
for event in client.unstructured.video.chat(
    [{"role": "user", "content": "Summarize the key points in this video"}],
    stream=True,
):
    print(event.data)
 
response = client.unstructured.video.chat(
    [{"role": "user", "content": "What products are shown?"}],
    stream=False,
)

Video Embeddings

stats = client.unstructured.video.embeddings.stats()
coverage = client.unstructured.video.embeddings.coverage()
client.unstructured.video.embeddings.backfill(max_files=10)
client.unstructured.video.embeddings.delete("video_id")
Context LayerDeep Research