curl -X POST https://vedaya-kge.fly.dev/documents/texts \\
  -H "Content-Type: application/json" \\
  -d '{
    "texts": [
      "Document about AI and machine learning...",
      "Document about neural networks..."
    ],
    "file_sources": ["doc_1.txt", "doc_2.txt"]
  }'
{
  "status": "success",
  "message": "Successfully inserted 2 texts. Processing will continue in background."
}

Document Management API

The Document Management API provides endpoints for uploading files, inserting text, scanning for new documents, and managing your document collection.

Quick Start

import requests

# Upload text documents (most common method)
response = requests.post(
    "https://vedaya-kge.fly.dev/documents/texts",
    json={
        "texts": ["Your document content here..."],
        "file_sources": ["doc_1.txt"]  # Optional
    }
)

# Check processing status
status = requests.get("https://vedaya-kge.fly.dev/documents/pipeline_status").json()
print(f"Processing: {'busy' if status.get('busy') else 'complete'}")

Insert Multiple Texts (Primary Method)

The most commonly used endpoint for uploading documents to Vedaya.
texts
array
required
List of text contents to insert into the RAG system
file_sources
array
Optional sources/names for the texts
curl -X POST https://vedaya-kge.fly.dev/documents/texts \\
  -H "Content-Type: application/json" \\
  -d '{
    "texts": [
      "Document about AI and machine learning...",
      "Document about neural networks..."
    ],
    "file_sources": ["doc_1.txt", "doc_2.txt"]
  }'
{
  "status": "success",
  "message": "Successfully inserted 2 texts. Processing will continue in background."
}

Working Example

import requests
import time

# No authentication required
documents = [
    "Document 1: Your content about topic A...",
    "Document 2: Related information about topic B..."
]

response = requests.post(
    "https://vedaya-kge.fly.dev/documents/texts",
    headers={"Content-Type": "application/json"},
    json={
        "texts": documents,
        "file_sources": [f"doc_{i}.txt" for i in range(len(documents))]
    }
)

if response.status_code == 200:
    print("✅ Documents uploaded successfully")
    
# Wait for processing (usually takes seconds, not minutes)
for i in range(10):
    status = requests.get(
        "https://vedaya-kge.fly.dev/documents/pipeline_status"
    ).json()
    
    if not status.get('busy', False):
        print("✅ Processing complete!")
        break
    
    time.sleep(2)
    print(f"Processing: {status.get('latest_message', '...')}")

Response

Returns an InsertResponse with the operation status:
  • status: “success”, “partial_success”, or “failure”
  • message: Details about the operation

Get Pipeline Status

Monitor document processing status.
busy
boolean
Whether pipeline is currently processing
latest_message
string
Current processing status message
docs
integer
Total documents being processed
cur_batch
integer
Current batch number
job_name
string
Current job name (e.g., “indexing files”)
curl https://vedaya-kge.fly.dev/documents/pipeline_status
{
  "busy": false,
  "job_name": "indexing texts",
  "docs": 10,
  "batchs": 2,
  "cur_batch": 2,
  "latest_message": "Processing complete",
  "request_pending": false
}

Example - Monitor Processing

import requests
import time

def wait_for_processing(max_wait=60):
    """Wait for document processing to complete"""
    for i in range(max_wait // 2):
        status = requests.get(
            "https://vedaya-kge.fly.dev/documents/pipeline_status"
        ).json()
        
        if not status.get('busy', False):
            return True
        
        time.sleep(2)
        print(f"Status: {status.get('latest_message', 'Processing...')}")
    
    return False

# Use after uploading documents
if wait_for_processing():
    print("Ready to query!")

Insert Single Text

For single document uploads.
text
string
required
The text content to insert
file_source
string
Optional source identifier
curl -X POST https://vedaya-kge.fly.dev/documents/text \\
  -H "Content-Type: application/json" \\
  -d '{
    "text": "Your document content here...",
    "file_source": "document.txt"
  }'
{
  "status": "success",
  "message": "Text inserted successfully"
}

Example

response = requests.post(
    "https://vedaya-kge.fly.dev/documents/text",
    json={
        "text": "Your single document content here...",
        "file_source": "document.txt"
    }
)

Upload Files

Upload actual files (PDF, DOCX, TXT, etc.) to the system.
file
file
required
The file to process (PDF, DOCX, TXT, MD, etc.)
curl -X POST https://vedaya-kge.fly.dev/documents/file \\
  -F "file=@document.pdf"
{
  "status": "success",
  "message": "File 'document.pdf' uploaded successfully. Processing will continue in background."
}

Example with File Upload

with open('document.pdf', 'rb') as f:
    response = requests.post(
        "https://vedaya-kge.fly.dev/documents/file",
        files={'file': f}
    )

Batch Upload Files

Process multiple files at once.
files
array[file]
required
List of files to process (PDF, DOCX, TXT, MD, etc.)
curl -X POST https://vedaya-kge.fly.dev/documents/file_batch \\
  -F "files=@document1.pdf" \\
  -F "files=@document2.txt" \\
  -F "files=@document3.docx"
{
  "status": "success",
  "message": "Successfully uploaded 3 files. Processing will continue in background.",
  "files": [
    "document1.pdf",
    "document2.txt",
    "document3.docx"
  ]
}

Get Document Status

View all documents and their processing status.
statuses
object
Documents grouped by processing status
curl https://vedaya-kge.fly.dev/documents
{
  "statuses": {
    "pending": [],
    "processing": [],
    "processed": [
      {
        "id": "doc_123456",
        "content_summary": "Research paper on machine learning",
        "content_length": 15240,
        "status": "processed",
        "created_at": "2025-03-31T12:34:56",
        "updated_at": "2025-03-31T12:35:30",
        "chunks_count": 12,
        "file_path": "research_paper.pdf"
      }
    ],
    "failed": []
  }
}

Example

response = requests.get("https://vedaya-kge.fly.dev/documents")
docs_status = response.json()

for status, docs in docs_status.items():
    print(f"{status}: {len(docs)} documents")

Clear All Documents

Delete all documents and related data from the system. Warning: This action cannot be undone!
curl -X DELETE https://vedaya-kge.fly.dev/documents
{
  "status": "success",
  "message": "All documents cleared successfully. Deleted 15 files."
}

Example

# Warning: This deletes everything!
response = requests.delete("https://vedaya-kge.fly.dev/documents")
print(response.json()['message'])

Clear Cache

Clear the LLM response cache for specific modes or all modes.
modes
array
Cache modes to clear. Options: “naive”, “local”, “global”, “hybrid”, “default”, “mix” If not specified, clears all cache.
curl -X POST https://vedaya-kge.fly.dev/documents/clear_cache \\
  -H "Content-Type: application/json" \\
  -d '{"modes": ["hybrid", "global"]}'
{
  "status": "success",
  "message": "Successfully cleared cache for modes: ['hybrid', 'global']"
}

Example

# Clear specific cache modes
response = requests.post(
    "https://vedaya-kge.fly.dev/documents/clear_cache",
    json={"modes": ["hybrid", "global"]}
)

Complete Workflow Example

import requests
import time
from openai import OpenAI

# 1. Upload documents
documents = [
    "Document about machine learning concepts...",
    "Document about neural networks..."
]

response = requests.post(
    "https://vedaya-kge.fly.dev/documents/texts",
    json={"texts": documents}
)

# 2. Wait for processing (usually seconds)
print("Processing documents...")
for i in range(15):
    status = requests.get(
        "https://vedaya-kge.fly.dev/documents/pipeline_status"
    ).json()
    
    if not status.get('busy'):
        print("✅ Processing complete!")
        break
    time.sleep(2)

# 3. Query using OpenAI-compatible interface
client = OpenAI(
    api_key="sk-dummy",  # Dummy key works
    base_url="https://vedaya-kge.fly.dev/v1"
)

response = client.chat.completions.create(
    model="vedaya-hybrid",
    messages=[{"role": "user", "content": "What topics are covered?"}],
    max_tokens=300
)

print(response.choices[0].message.content)

Important Notes

  • Processing is fast - Documents typically process in seconds, not minutes
  • Text upload preferred - Use /documents/texts for best results
  • File formats - Supports PDF, DOCX, TXT, MD, and many other formats
  • Automatic extraction - PDFs and DOCX files are automatically converted to text

Optional Authentication

If you want to use authentication (not required):
# Optional: Add authorization header
headers = {"Content-Type": "application/json"}
if API_KEY:  # Only if you have a real key
    headers["Authorization"] = f"Bearer {API_KEY}"

response = requests.post(
    "https://vedaya-kge.fly.dev/documents/texts",
    headers=headers,
    json={"texts": documents}
)