Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion env.example
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ SUMMARY_LANGUAGE=English
###############################
### Concurrency Configuration
###############################
### Max concurrency requests of LLM (for both query and document processing)
### Max concurrency requests of LLM (default for all roles; override per role with MAX_ASYNC_EXTRACT_LLM, MAX_ASYNC_KEYWORD_LLM, MAX_ASYNC_QUERY_LLM)
MAX_ASYNC=4
### Number of parallel processing documents(between 2~10, MAX_ASYNC/3 is recommended)
MAX_PARALLEL_INSERT=2
Expand Down Expand Up @@ -268,6 +268,57 @@ LLM_MODEL=gpt-5-mini
### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000

###############################
### Per-Role LLM Configuration
### Separate LLM functions for extract (entity extraction), keyword (keyword extraction), and query (answer generation).
### Each role inherits from the base LLM config above. Only set overrides as needed.
### Each role gets its own independent concurrency queue.
###############################

### Extract role (entity/relation extraction + description summarization)
# EXTRACT_LLM_BINDING=openai
# EXTRACT_LLM_MODEL=gpt-4o
# EXTRACT_LLM_BINDING_HOST=https://api.openai.com/v1
# EXTRACT_LLM_BINDING_API_KEY=your_api_key
# MAX_ASYNC_EXTRACT_LLM=8
# LLM_TIMEOUT_EXTRACT_LLM=240

### Keyword role (query keyword extraction)
# KEYWORD_LLM_BINDING=openai
# KEYWORD_LLM_MODEL=gpt-4o-mini
# KEYWORD_LLM_BINDING_HOST=https://api.openai.com/v1
# KEYWORD_LLM_BINDING_API_KEY=your_api_key
# MAX_ASYNC_KEYWORD_LLM=4
# LLM_TIMEOUT_KEYWORD_LLM=60

### Query role (final answer generation, bypass mode, Ollama API direct calls)
# QUERY_LLM_BINDING=openai
# QUERY_LLM_MODEL=gpt-4o
# QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
# QUERY_LLM_BINDING_API_KEY=your_api_key
# MAX_ASYNC_QUERY_LLM=12
# LLM_TIMEOUT_QUERY_LLM=300

### Per-role provider options
### Override provider-specific parameters per role. Format: {ROLE}_{PROVIDER_PREFIX}_{FIELD}
### Same provider: inherits from base options, only set overrides.
### Cross provider: starts from provider defaults, set all needed options.
###
### OpenAI examples (applies to openai and azure_openai bindings):
# EXTRACT_OPENAI_LLM_TEMPERATURE=0.1
# EXTRACT_OPENAI_LLM_MAX_COMPLETION_TOKENS=6000
# KEYWORD_OPENAI_LLM_TEMPERATURE=0.0
# QUERY_OPENAI_LLM_REASONING_EFFORT=high
###
### Ollama examples:
# EXTRACT_OLLAMA_LLM_NUM_CTX=65536
# EXTRACT_OLLAMA_LLM_TEMPERATURE=0.1
# KEYWORD_OLLAMA_LLM_NUM_CTX=16384
###
### Gemini examples:
# EXTRACT_GEMINI_LLM_TEMPERATURE=0.1
# QUERY_GEMINI_LLM_MAX_OUTPUT_TOKENS=8192

### Azure OpenAI example
### Use deployment name as model name or set AZURE_OPENAI_DEPLOYMENT instead
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
Expand Down
41 changes: 41 additions & 0 deletions lightrag/api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,47 @@ def parse_args() -> argparse.Namespace:
ollama_server_infos.LIGHTRAG_NAME = args.simulated_model_name
ollama_server_infos.LIGHTRAG_TAG = args.simulated_model_tag

# Per-role LLM configuration
# Extract role
args.extract_llm_binding = get_env_value("EXTRACT_LLM_BINDING", None)
args.extract_llm_model = get_env_value("EXTRACT_LLM_MODEL", None)
args.extract_llm_binding_host = get_env_value("EXTRACT_LLM_BINDING_HOST", None)
args.extract_llm_binding_api_key = get_env_value(
"EXTRACT_LLM_BINDING_API_KEY", None
)
args.max_async_extract_llm = get_env_value(
"MAX_ASYNC_EXTRACT_LLM", None, int, special_none=True
)
args.llm_timeout_extract_llm = get_env_value(
"LLM_TIMEOUT_EXTRACT_LLM", None, int, special_none=True
)

# Keyword role
args.keyword_llm_binding = get_env_value("KEYWORD_LLM_BINDING", None)
args.keyword_llm_model = get_env_value("KEYWORD_LLM_MODEL", None)
args.keyword_llm_binding_host = get_env_value("KEYWORD_LLM_BINDING_HOST", None)
args.keyword_llm_binding_api_key = get_env_value(
"KEYWORD_LLM_BINDING_API_KEY", None
)
args.max_async_keyword_llm = get_env_value(
"MAX_ASYNC_KEYWORD_LLM", None, int, special_none=True
)
args.llm_timeout_keyword_llm = get_env_value(
"LLM_TIMEOUT_KEYWORD_LLM", None, int, special_none=True
)

# Query role
args.query_llm_binding = get_env_value("QUERY_LLM_BINDING", None)
args.query_llm_model = get_env_value("QUERY_LLM_MODEL", None)
args.query_llm_binding_host = get_env_value("QUERY_LLM_BINDING_HOST", None)
args.query_llm_binding_api_key = get_env_value("QUERY_LLM_BINDING_API_KEY", None)
args.max_async_query_llm = get_env_value(
"MAX_ASYNC_QUERY_LLM", None, int, special_none=True
)
args.llm_timeout_query_llm = get_env_value(
"LLM_TIMEOUT_QUERY_LLM", None, int, special_none=True
)

return args


Expand Down
Loading
Loading