payload = {
"query": "Explain deep learning architectures",
"mode": "global",
# Retrieval settings
"top_k": 30,
"max_token_for_text_unit": 4000,
"max_token_for_global_context": 4000,
"max_token_for_local_context": 4000,
# LLM configuration
"llm_provider": "openai", # or "anthropic", "ollama", etc.
"llm_model": "gpt-4",
"llm_temperature": 0.7,
"llm_api_key": "sk-...", # Optional, uses server default if not provided
# Response control
"response_type": "Bullet Points",
"only_need_context": False, # Set True to get only retrieved context
"only_need_prompt": False, # Set True to get only the prompt
"disable_llm_generation": False, # Set True for retrieval only
# Conversation context
"conversation_history": [
{"role": "user", "content": "What is AI?"},
{"role": "assistant", "content": "AI is..."}
],
"history_turns": 2 # How many turns to consider
}
response = requests.post(url, headers=headers, json=payload)