Skip to main content

Document Management API

The Document Management API provides endpoints for uploading files, inserting text, scanning for new documents, and managing your document collection.

Quick Start

import requests

# Upload text documents (most common method)
response = requests.post(
    "https://vedaya-kge.fly.dev/documents/texts",
    json={
        "texts": ["Your document content here..."],
        "file_sources": ["doc_1.txt"]  # Optional
    }
)

# Check processing status
status = requests.get("https://vedaya-kge.fly.dev/documents/pipeline_status").json()
print(f"Processing: {'busy' if status.get('busy') else 'complete'}")

Insert Multiple Texts (Primary Method)

The most commonly used endpoint for uploading documents to Vedaya.
texts
array
required
List of text contents to insert into the RAG system
file_sources
array
Optional sources/names for the texts
curl -X POST https://vedaya-kge.fly.dev/documents/texts \\
  -H "Content-Type: application/json" \\
  -d '{
    "texts": [
      "Document about AI and machine learning...",
      "Document about neural networks..."
    ],
    "file_sources": ["doc_1.txt", "doc_2.txt"]
  }'
{
  "status": "success",
  "message": "Successfully inserted 2 texts. Processing will continue in background."
}

Working Example

import requests
import time

# No authentication required
documents = [
    "Document 1: Your content about topic A...",
    "Document 2: Related information about topic B..."
]

response = requests.post(
    "https://vedaya-kge.fly.dev/documents/texts",
    headers={"Content-Type": "application/json"},
    json={
        "texts": documents,
        "file_sources": [f"doc_{i}.txt" for i in range(len(documents))]
    }
)

if response.status_code == 200:
    print("✅ Documents uploaded successfully")
    
# Wait for processing (usually takes seconds, not minutes)
for i in range(10):
    status = requests.get(
        "https://vedaya-kge.fly.dev/documents/pipeline_status"
    ).json()
    
    if not status.get('busy', False):
        print("✅ Processing complete!")
        break
    
    time.sleep(2)
    print(f"Processing: {status.get('latest_message', '...')}")

Response

Returns an InsertResponse with the operation status:
  • status: “success”, “partial_success”, or “failure”
  • message: Details about the operation

Get Pipeline Status

Monitor document processing status.
busy
boolean
Whether pipeline is currently processing
latest_message
string
Current processing status message
docs
integer
Total documents being processed
cur_batch
integer
Current batch number
job_name
string
Current job name (e.g., “indexing files”)
curl https://vedaya-kge.fly.dev/documents/pipeline_status
{
  "busy": false,
  "job_name": "indexing texts",
  "docs": 10,
  "batchs": 2,
  "cur_batch": 2,
  "latest_message": "Processing complete",
  "request_pending": false
}

Example - Monitor Processing

import requests
import time

def wait_for_processing(max_wait=60):
    """Wait for document processing to complete"""
    for i in range(max_wait // 2):
        status = requests.get(
            "https://vedaya-kge.fly.dev/documents/pipeline_status"
        ).json()
        
        if not status.get('busy', False):
            return True
        
        time.sleep(2)
        print(f"Status: {status.get('latest_message', 'Processing...')}")
    
    return False

# Use after uploading documents
if wait_for_processing():
    print("Ready to query!")

Insert Single Text

For single document uploads.
text
string
required
The text content to insert
file_source
string
Optional source identifier
curl -X POST https://vedaya-kge.fly.dev/documents/text \\
  -H "Content-Type: application/json" \\
  -d '{
    "text": "Your document content here...",
    "file_source": "document.txt"
  }'
{
  "status": "success",
  "message": "Text inserted successfully"
}

Example

response = requests.post(
    "https://vedaya-kge.fly.dev/documents/text",
    json={
        "text": "Your single document content here...",
        "file_source": "document.txt"
    }
)

Upload Files

Upload actual files (PDF, DOCX, TXT, etc.) to the system.
file
file
required
The file to process (PDF, DOCX, TXT, MD, etc.)
curl -X POST https://vedaya-kge.fly.dev/documents/file \\
  -F "[email protected]"
{
  "status": "success",
  "message": "File 'document.pdf' uploaded successfully. Processing will continue in background."
}

Example with File Upload

with open('document.pdf', 'rb') as f:
    response = requests.post(
        "https://vedaya-kge.fly.dev/documents/file",
        files={'file': f}
    )

Batch Upload Files

Process multiple files at once.
files
array[file]
required
List of files to process (PDF, DOCX, TXT, MD, etc.)
curl -X POST https://vedaya-kge.fly.dev/documents/file_batch \\
  -F "[email protected]" \\
  -F "[email protected]" \\
  -F "[email protected]"
{
  "status": "success",
  "message": "Successfully uploaded 3 files. Processing will continue in background.",
  "files": [
    "document1.pdf",
    "document2.txt",
    "document3.docx"
  ]
}

Get Document Status

View all documents and their processing status.
statuses
object
Documents grouped by processing status
curl https://vedaya-kge.fly.dev/documents
{
  "statuses": {
    "pending": [],
    "processing": [],
    "processed": [
      {
        "id": "doc_123456",
        "content_summary": "Research paper on machine learning",
        "content_length": 15240,
        "status": "processed",
        "created_at": "2025-03-31T12:34:56",
        "updated_at": "2025-03-31T12:35:30",
        "chunks_count": 12,
        "file_path": "research_paper.pdf"
      }
    ],
    "failed": []
  }
}

Example

response = requests.get("https://vedaya-kge.fly.dev/documents")
docs_status = response.json()

for status, docs in docs_status.items():
    print(f"{status}: {len(docs)} documents")

Clear All Documents

Delete all documents and related data from the system. Warning: This action cannot be undone!
curl -X DELETE https://vedaya-kge.fly.dev/documents
{
  "status": "success",
  "message": "All documents cleared successfully. Deleted 15 files."
}

Example

# Warning: This deletes everything!
response = requests.delete("https://vedaya-kge.fly.dev/documents")
print(response.json()['message'])

Clear Cache

Clear the LLM response cache for specific modes or all modes.
modes
array
Cache modes to clear. Options: “naive”, “local”, “global”, “hybrid”, “default”, “mix” If not specified, clears all cache.
curl -X POST https://vedaya-kge.fly.dev/documents/clear_cache \\
  -H "Content-Type: application/json" \\
  -d '{"modes": ["hybrid", "global"]}'
{
  "status": "success",
  "message": "Successfully cleared cache for modes: ['hybrid', 'global']"
}

Example

# Clear specific cache modes
response = requests.post(
    "https://vedaya-kge.fly.dev/documents/clear_cache",
    json={"modes": ["hybrid", "global"]}
)

Complete Workflow Example

import requests
import time
from openai import OpenAI

# 1. Upload documents
documents = [
    "Document about machine learning concepts...",
    "Document about neural networks..."
]

response = requests.post(
    "https://vedaya-kge.fly.dev/documents/texts",
    json={"texts": documents}
)

# 2. Wait for processing (usually seconds)
print("Processing documents...")
for i in range(15):
    status = requests.get(
        "https://vedaya-kge.fly.dev/documents/pipeline_status"
    ).json()
    
    if not status.get('busy'):
        print("✅ Processing complete!")
        break
    time.sleep(2)

# 3. Query using OpenAI-compatible interface
client = OpenAI(
    api_key="sk-dummy",  # Dummy key works
    base_url="https://vedaya-kge.fly.dev/v1"
)

response = client.chat.completions.create(
    model="vedaya-hybrid",
    messages=[{"role": "user", "content": "What topics are covered?"}],
    max_tokens=300
)

print(response.choices[0].message.content)

Important Notes

  • Processing is fast - Documents typically process in seconds, not minutes
  • Text upload preferred - Use /documents/texts for best results
  • File formats - Supports PDF, DOCX, TXT, MD, and many other formats
  • Automatic extraction - PDFs and DOCX files are automatically converted to text

Optional Authentication

If you want to use authentication (not required):
# Optional: Add authorization header
headers = {"Content-Type": "application/json"}
if API_KEY:  # Only if you have a real key
    headers["Authorization"] = f"Bearer {API_KEY}"

response = requests.post(
    "https://vedaya-kge.fly.dev/documents/texts",
    headers=headers,
    json={"texts": documents}
)