Primary Method: OpenAI-Compatible Interface

The recommended way to query your knowledge base is through the OpenAI-compatible interface:
from openai import OpenAI

# Setup client pointing to Vedaya
client = OpenAI(
    api_key="sk-dummy",  # API works without real authentication
    base_url="https://vedaya-kge.fly.dev/v1"
)

# Query your knowledge base
response = client.chat.completions.create(
    model="vedaya-hybrid",  # Special Vedaya model for RAG
    messages=[
        {"role": "user", "content": "What is machine learning?"}
    ],
    temperature=0.7,
    max_tokens=500
)

print(response.choices[0].message.content)

Available RAG Models

Use special model names to control RAG behavior:
Model NameDescriptionBest For
vedaya-hybridCombined entity + relationship retrieval (default)General queries
vedaya-naiveBasic keyword searchSimple fact retrieval
vedaya-localEntity-focused retrievalFinding specific entities
vedaya-globalRelationship-focused retrievalUnderstanding connections
vedaya-bypassDirect LLM without retrievalNo RAG needed

Multi-Turn Conversations

The API maintains context across messages:
messages = []

# First question
messages.append({"role": "user", "content": "What is deep learning?"})
response = client.chat.completions.create(
    model="vedaya-hybrid",
    messages=messages,
    max_tokens=300
)
answer = response.choices[0].message.content
messages.append({"role": "assistant", "content": answer})
print(f"Assistant: {answer}")

# Follow-up question (maintains context)
messages.append({"role": "user", "content": "Can you give me an example?"})
response = client.chat.completions.create(
    model="vedaya-hybrid",
    messages=messages,
    max_tokens=300
)
print(f"Assistant: {response.choices[0].message.content}")

HTTP Fallback Method

If the OpenAI SDK fails, use direct HTTP requests:
import requests

response = requests.post(
    "https://vedaya-kge.fly.dev/v1/chat/completions",
    headers={"Content-Type": "application/json"},
    json={
        "model": "vedaya-hybrid",
        "messages": [
            {"role": "user", "content": "What is machine learning?"}
        ],
        "temperature": 0.7,
        "max_tokens": 500
    }
)

if response.status_code == 200:
    answer = response.json()['choices'][0]['message']['content']
    print(answer)

Alternative: Native Query Endpoint

You can also use the native /query endpoint for more control:
import requests

# Optional authentication (API works without it)
headers = {"Content-Type": "application/json"}
# headers["Authorization"] = f"Bearer {API_KEY}"  # If you have a key

payload = {
    "query": "What is machine learning?",
    "mode": "hybrid",  # Options: naive, local, global, hybrid
    "top_k": 20,
    "response_type": "Multiple Paragraphs"
}

response = requests.post(
    "https://vedaya-kge.fly.dev/query",
    headers=headers,
    json=payload
)

result = response.json()
print(result["response"])

Advanced Query Options

payload = {
    "query": "Explain deep learning architectures",
    "mode": "global",
    
    # Retrieval settings
    "top_k": 30,
    "max_token_for_text_unit": 4000,
    "max_token_for_global_context": 4000,
    "max_token_for_local_context": 4000,
    
    # LLM configuration
    "llm_provider": "openai",  # or "anthropic", "ollama", etc.
    "llm_model": "gpt-4",
    "llm_temperature": 0.7,
    "llm_api_key": "sk-...",  # Optional, uses server default if not provided
    
    # Response control
    "response_type": "Bullet Points",
    "only_need_context": False,  # Set True to get only retrieved context
    "only_need_prompt": False,   # Set True to get only the prompt
    "disable_llm_generation": False,  # Set True for retrieval only
    
    # Conversation context
    "conversation_history": [
        {"role": "user", "content": "What is AI?"},
        {"role": "assistant", "content": "AI is..."}
    ],
    "history_turns": 2  # How many turns to consider
}

response = requests.post(url, headers=headers, json=payload)

Complete Working Example

Here’s a practical function that handles both methods:
def query_knowledge_base(question, mode="vedaya-hybrid"):
    """Query the knowledge base with automatic fallback"""
    
    # Try OpenAI SDK first
    try:
        from openai import OpenAI
        client = OpenAI(
            api_key="sk-dummy",
            base_url="https://vedaya-kge.fly.dev/v1"
        )
        
        response = client.chat.completions.create(
            model=mode,
            messages=[{"role": "user", "content": question}],
            temperature=0.7,
            max_tokens=500
        )
        return response.choices[0].message.content
        
    except Exception as e:
        # Fallback to direct HTTP
        import requests
        response = requests.post(
            "https://vedaya-kge.fly.dev/v1/chat/completions",
            headers={"Content-Type": "application/json"},
            json={
                "model": mode,
                "messages": [{"role": "user", "content": question}],
                "temperature": 0.7,
                "max_tokens": 500
            }
        )
        if response.status_code == 200:
            return response.json()['choices'][0]['message']['content']
        return f"Error: {response.status_code}"

# Usage examples
answer = query_knowledge_base("What are the main topics?")
answer = query_knowledge_base("Find all entities", mode="vedaya-local")
answer = query_knowledge_base("Show relationships", mode="vedaya-global")

Important Notes

  • Streaming is not available - The streaming endpoint returns 404. Use regular requests instead.
  • Processing is fast - Documents typically process in seconds, not minutes
  • Model names matter - Use vedaya-* prefixed models for RAG modes

Response Types

Control output format with response_type:
  • “Multiple Paragraphs”
  • “Single Paragraph”
  • “Bullet Points”
  • “Numbered List”
  • “Brief Summary”
  • “Technical Analysis”
  • “Detailed Explanation”