Document Management API
The Document Management API provides endpoints for uploading files, inserting text, scanning for new documents, and managing your document collection.
Quick Start
import requests
# Upload text documents (most common method)
response = requests.post(
"https://vedaya-kge.fly.dev/documents/texts" ,
json = {
"texts" : [ "Your document content here..." ],
"file_sources" : [ "doc_1.txt" ] # Optional
}
)
# Check processing status
status = requests.get( "https://vedaya-kge.fly.dev/documents/pipeline_status" ).json()
print ( f "Processing: { 'busy' if status.get( 'busy' ) else 'complete' } " )
Insert Multiple Texts (Primary Method)
The most commonly used endpoint for uploading documents to Vedaya.
List of text contents to insert into the RAG system
Optional sources/names for the texts
curl -X POST https://vedaya-kge.fly.dev/documents/texts \\
-H "Content-Type: application/json" \\
-d '{
"texts": [
"Document about AI and machine learning...",
"Document about neural networks..."
],
"file_sources": ["doc_1.txt", "doc_2.txt"]
}'
{
"status" : "success" ,
"message" : "Successfully inserted 2 texts. Processing will continue in background."
}
Working Example
import requests
import time
# No authentication required
documents = [
"Document 1: Your content about topic A..." ,
"Document 2: Related information about topic B..."
]
response = requests.post(
"https://vedaya-kge.fly.dev/documents/texts" ,
headers = { "Content-Type" : "application/json" },
json = {
"texts" : documents,
"file_sources" : [ f "doc_ { i } .txt" for i in range ( len (documents))]
}
)
if response.status_code == 200 :
print ( "✅ Documents uploaded successfully" )
# Wait for processing (usually takes seconds, not minutes)
for i in range ( 10 ):
status = requests.get(
"https://vedaya-kge.fly.dev/documents/pipeline_status"
).json()
if not status.get( 'busy' , False ):
print ( "✅ Processing complete!" )
break
time.sleep( 2 )
print ( f "Processing: { status.get( 'latest_message' , '...' ) } " )
Response
Returns an InsertResponse with the operation status:
status
: “success”, “partial_success”, or “failure”
message
: Details about the operation
Get Pipeline Status
Monitor document processing status.
Whether pipeline is currently processing
Current processing status message
Total documents being processed
Current job name (e.g., “indexing files”)
curl https://vedaya-kge.fly.dev/documents/pipeline_status
{
"busy" : false ,
"job_name" : "indexing texts" ,
"docs" : 10 ,
"batchs" : 2 ,
"cur_batch" : 2 ,
"latest_message" : "Processing complete" ,
"request_pending" : false
}
Example - Monitor Processing
import requests
import time
def wait_for_processing ( max_wait = 60 ):
"""Wait for document processing to complete"""
for i in range (max_wait // 2 ):
status = requests.get(
"https://vedaya-kge.fly.dev/documents/pipeline_status"
).json()
if not status.get( 'busy' , False ):
return True
time.sleep( 2 )
print ( f "Status: { status.get( 'latest_message' , 'Processing...' ) } " )
return False
# Use after uploading documents
if wait_for_processing():
print ( "Ready to query!" )
Insert Single Text
For single document uploads.
The text content to insert
Optional source identifier
curl -X POST https://vedaya-kge.fly.dev/documents/text \\
-H "Content-Type: application/json" \\
-d '{
"text": "Your document content here...",
"file_source": "document.txt"
}'
{
"status" : "success" ,
"message" : "Text inserted successfully"
}
Example
response = requests.post(
"https://vedaya-kge.fly.dev/documents/text" ,
json = {
"text" : "Your single document content here..." ,
"file_source" : "document.txt"
}
)
Upload Files
Upload actual files (PDF, DOCX, TXT, etc.) to the system.
The file to process (PDF, DOCX, TXT, MD, etc.)
curl -X POST https://vedaya-kge.fly.dev/documents/file \\
-F "file=@document.pdf"
{
"status" : "success" ,
"message" : "File 'document.pdf' uploaded successfully. Processing will continue in background."
}
Example with File Upload
with open ( 'document.pdf' , 'rb' ) as f:
response = requests.post(
"https://vedaya-kge.fly.dev/documents/file" ,
files = { 'file' : f}
)
Batch Upload Files
Process multiple files at once.
List of files to process (PDF, DOCX, TXT, MD, etc.)
curl -X POST https://vedaya-kge.fly.dev/documents/file_batch \\
-F "files=@document1.pdf" \\
-F "files=@document2.txt" \\
-F "files=@document3.docx"
{
"status" : "success" ,
"message" : "Successfully uploaded 3 files. Processing will continue in background." ,
"files" : [
"document1.pdf" ,
"document2.txt" ,
"document3.docx"
]
}
Get Document Status
View all documents and their processing status.
Documents grouped by processing status Documents waiting to be processed
Documents currently being processed
Successfully processed documents
Documents that failed to process
curl https://vedaya-kge.fly.dev/documents
{
"statuses" : {
"pending" : [],
"processing" : [],
"processed" : [
{
"id" : "doc_123456" ,
"content_summary" : "Research paper on machine learning" ,
"content_length" : 15240 ,
"status" : "processed" ,
"created_at" : "2025-03-31T12:34:56" ,
"updated_at" : "2025-03-31T12:35:30" ,
"chunks_count" : 12 ,
"file_path" : "research_paper.pdf"
}
],
"failed" : []
}
}
Example
response = requests.get( "https://vedaya-kge.fly.dev/documents" )
docs_status = response.json()
for status, docs in docs_status.items():
print ( f " { status } : { len (docs) } documents" )
Clear All Documents
Delete all documents and related data from the system. Warning: This action cannot be undone!
curl -X DELETE https://vedaya-kge.fly.dev/documents
{
"status" : "success" ,
"message" : "All documents cleared successfully. Deleted 15 files."
}
Example
# Warning: This deletes everything!
response = requests.delete( "https://vedaya-kge.fly.dev/documents" )
print (response.json()[ 'message' ])
Clear Cache
Clear the LLM response cache for specific modes or all modes.
Cache modes to clear. Options: “naive”, “local”, “global”, “hybrid”, “default”, “mix”
If not specified, clears all cache.
curl -X POST https://vedaya-kge.fly.dev/documents/clear_cache \\
-H "Content-Type: application/json" \\
-d '{"modes": ["hybrid", "global"]}'
{
"status" : "success" ,
"message" : "Successfully cleared cache for modes: ['hybrid', 'global']"
}
Example
# Clear specific cache modes
response = requests.post(
"https://vedaya-kge.fly.dev/documents/clear_cache" ,
json = { "modes" : [ "hybrid" , "global" ]}
)
Complete Workflow Example
import requests
import time
from openai import OpenAI
# 1. Upload documents
documents = [
"Document about machine learning concepts..." ,
"Document about neural networks..."
]
response = requests.post(
"https://vedaya-kge.fly.dev/documents/texts" ,
json = { "texts" : documents}
)
# 2. Wait for processing (usually seconds)
print ( "Processing documents..." )
for i in range ( 15 ):
status = requests.get(
"https://vedaya-kge.fly.dev/documents/pipeline_status"
).json()
if not status.get( 'busy' ):
print ( "✅ Processing complete!" )
break
time.sleep( 2 )
# 3. Query using OpenAI-compatible interface
client = OpenAI(
api_key = "sk-dummy" , # Dummy key works
base_url = "https://vedaya-kge.fly.dev/v1"
)
response = client.chat.completions.create(
model = "vedaya-hybrid" ,
messages = [{ "role" : "user" , "content" : "What topics are covered?" }],
max_tokens = 300
)
print (response.choices[ 0 ].message.content)
Important Notes
Processing is fast - Documents typically process in seconds, not minutes
Text upload preferred - Use /documents/texts
for best results
File formats - Supports PDF, DOCX, TXT, MD, and many other formats
Automatic extraction - PDFs and DOCX files are automatically converted to text
Optional Authentication
If you want to use authentication (not required):
# Optional: Add authorization header
headers = { "Content-Type" : "application/json" }
if API_KEY : # Only if you have a real key
headers[ "Authorization" ] = f "Bearer { API_KEY } "
response = requests.post(
"https://vedaya-kge.fly.dev/documents/texts" ,
headers = headers,
json = { "texts" : documents}
)