HKUDS · danielaskdd · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026 · Mar 8, 2026
diff --git a/docker-compose-full.yml b/docker-compose-full.yml
@@ -14,7 +14,6 @@ services:
     volumes:
       - ./data/rag_storage:/app/data/rag_storage
       - ./data/inputs:/app/data/inputs
-      - ./config.ini:/app/config.ini
       - ./.env:/app/.env
     deploy:
       restart_policy:

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -11,7 +11,6 @@ services:
     volumes:
       - ./data/rag_storage:/app/data/rag_storage
       - ./data/inputs:/app/data/inputs
-      - ./config.ini:/app/config.ini
       - ./.env:/app/.env
     deploy:
       restart_policy:

diff --git a/docs/RAGAnythingParserAlignment.md b/docs/RAGAnythingParserAlignment.md
@@ -0,0 +1,91 @@
+## RAG-Anything Parser Alignment Notes
+
+This document summarizes the companion changes made on the `RAG-Anything` side to better align its parser output with the LightRAG multimodal pipeline introduced in this PR.
+
+These notes are provided as reviewer context. The code changes described below live in the `RAG-Anything` repository, mainly in `raganything/parser.py`, rather than in this LightRAG pull request.
+
+## Why This Alignment Was Needed
+
+The LightRAG-side pipeline in this PR expects parser output to preserve heading structure, normalize multimodal block types consistently, and expose enough table metadata to generate correct LightRAG document sidecars.
+
+Without the parser-side alignment, several downstream issues appear more easily:
+
+- section headings may be lost before LightRAG sidecar generation
+- table dimensions can degrade to `[0, 0]`
+- table content may be harder to serialize into stable sidecar payloads
+- parser output shape may drift between Docling variants
+
+## RAG-Anything Changes
+
+### 1. Add safe helper functions for parser normalization
+
+Two small helpers were added:
+
+- `_to_int(value, default=0)`
+- `_grid_to_rows(grid)`
+
+Their purpose is to make parser output more defensive and consistent when Docling returns numeric fields or table cell structures in slightly different formats.
+
+### 2. Normalize text labels before branching
+
+Docling text blocks are now normalized through:
+
+- `label = str(block.get("label", "")).strip().lower()`
+
+This avoids relying on a raw case-sensitive label and makes formula / title / section-header detection more stable.
+
+### 3. Preserve section heading structure explicitly
+
+For Docling text blocks, `section_header` and `title` are now emitted as dedicated structured blocks:
+
+- `type: "section_header"` or `type: "title"`
+- `text`
+- `level`
+- `page_idx`
+
+This is important because the LightRAG-side conversion logic uses heading information to:
+
+- propagate `heading`
+- build `parent_headings`
+- keep multimodal sidecars attached to the correct section context
+
+### 4. Preserve label and level on normal text blocks
+
+For non-heading text blocks, the parser now also retains:
+
+- `label`
+- `level`
+
+This gives LightRAG more context when converting parser output into LightRAG document blocks and helps preserve document structure more faithfully.
+
+### 5. Improve table normalization for Docling output
+
+Table parsing was expanded to support both:
+
+- dict-style table payloads with `grid`, `num_rows`, `num_cols`
+- legacy list-style table payloads
+
+The parser now derives and exposes:
+
+- `table_body`
+- `rows`
+- `num_rows`
+- `num_cols`
+
+This is the key alignment needed for LightRAG-side table sidecar generation, especially to avoid empty dimensions and to keep table content serializable in a stable form.
+
+## Practical Impact on This PR
+
+These RAG-Anything parser changes are the external counterpart of the LightRAG work in this PR:
+
+- LightRAG now converts structured parser output into LightRAG document artifacts
+- multimodal sidecars depend on parser-side heading and table metadata
+- heading propagation and table dimension fixes are more reliable when the parser emits normalized structure upstream
+
+In short, the LightRAG code in this PR can run independently, but the best end-to-end behavior for Docling/RAG-Anything-driven multimodal ingestion depends on this parser alignment on the `RAG-Anything` side as well.
+
+## Scope Note
+
+This document is intentionally limited to parser-alignment notes for `RAG-Anything`.
+
+It does not describe the entity disambiguation experiment, which is explicitly excluded from this PR.
diff --git a/env.example b/env.example
@@ -188,9 +188,6 @@ SUMMARY_LANGUAGE=English
 ### Note: If using Nginx as reverse proxy, also configure client_max_body_size
 # MAX_UPLOAD_SIZE=104857600
 
-### Entity types that the LLM will attempt to recognize
-# ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'
-
 ### Chunk size for document splitting, 500~1500 is recommended
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
@@ -206,6 +203,51 @@ SUMMARY_LANGUAGE=English
 ### Maximum token size allowed for entity extraction input context
 # MAX_EXTRACT_INPUT_TOKENS=20480
 
+### Per-response cap on total entity+relationship rows/records emitted by the LLM
+# MAX_EXTRACTION_RECORDS=100
+### Per-response cap on entity rows/objects emitted by the LLM
+# MAX_EXTRACTION_ENTITIES=40
+
+### Use JSON structured output for entity extraction (false: default,  JSON is slower but more reliable)
+ENTITY_EXTRACTION_USE_JSON=true
+
+### Multimodal parsing/analyze integration
+### Optional parser routing rules, for example:
+###   pdf:mineru-iet,docx:docling,pptx:docling,*:native
+# LIGHTRAG_PARSER=
+### Optional local checkout path of RAG-Anything for parser integration
+# RAGANYTHING_ROOT=/path/to/RAG-Anything
+### Retry count for multimodal VLM analysis JSON normalization/writeback
+# VLM_ANALYZE_RETRIES=2
+### Maximum image bytes sent to VLM per multimodal item
+# VLM_MAX_IMAGE_BYTES=5242880
+
+### Async parser service protocol (optional)
+### Configure these when using remote MinerU/Docling async services
+# MINERU_ENDPOINT=http://localhost:8000/api/v1/task
+# MINERU_POLL_ENDPOINT=http://localhost:8000/api/v1/task/{trace_id}
+# MINERU_POLL_METHOD=GET
+# MINERU_ID_FIELD=trace_id
+# MINERU_STATUS_FIELD=status
+# MINERU_RESULT_URL_FIELD=result_url
+# MINERU_CONTENT_FIELD=content
+# MINERU_SUCCESS_VALUES=done,success,completed
+# MINERU_FAILED_VALUES=failed,error,cancelled
+# MINERU_POLL_INTERVAL_SECONDS=2
+# MINERU_MAX_POLLS=180
+
+# DOCLING_ENDPOINT=http://localhost:8081/v1/convert/file/async
+# DOCLING_POLL_ENDPOINT=http://localhost:8081/v1/convert/file/async/{task_id}
+# DOCLING_POLL_METHOD=GET
+# DOCLING_ID_FIELD=task_id
+# DOCLING_STATUS_FIELD=status
+# DOCLING_RESULT_URL_FIELD=result_url
+# DOCLING_CONTENT_FIELD=content
+# DOCLING_SUCCESS_VALUES=done,success,completed
+# DOCLING_FAILED_VALUES=failed,error,cancelled
+# DOCLING_POLL_INTERVAL_SECONDS=2
+# DOCLING_MAX_POLLS=180
+
 ### control the maximum chunk_ids stored in vector and graph db
 # MAX_SOURCE_IDS_PER_ENTITY=300
 # MAX_SOURCE_IDS_PER_RELATION=300
@@ -227,6 +269,14 @@ SUMMARY_LANGUAGE=English
 MAX_ASYNC=4
 ### Number of parallel processing documents(between 2~10, MAX_ASYNC/3 is recommended)
 MAX_PARALLEL_INSERT=2
+### Optional per-stage document pipeline concurrency
+# MAX_PARALLEL_PARSE_NATIVE=5
+# MAX_PARALLEL_PARSE_MINERU=3
+# MAX_PARALLEL_PARSE_DOCLING=3
+# MAX_PARALLEL_ANALYZE=2
+### Optional queue sizes for staged pipeline workers
+# QUEUE_SIZE_DEFAULT=100
+# QUEUE_SIZE_INSERT=4
 ### Max concurrency requests for Embedding
 # EMBEDDING_FUNC_MAX_ASYNC=8
 ### Num of chunks send to Embedding in single request
@@ -235,7 +285,7 @@ MAX_PARALLEL_INSERT=2
 ###########################################################################
 ### LLM Configuration
 ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
-### LLM_BINDING_HOST: Service endpoint (left empty if using default endpoint provided by openai or gemini SDK)
+### LLM_BINDING_HOST: Service endpoint (left empty if using the provider SDK default endpoint)
 ### LLM_BINDING_API_KEY: api key
 ### If LightRAG deployed in Docker:
 ###    uses host.docker.internal instead of localhost in LLM_BINDING_HOST
@@ -262,7 +312,7 @@ LLM_MODEL=gpt-5-mini
 # OPENAI_LLM_TEMPERATURE=0.9
 ### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
 ### Typically, max_tokens does not include prompt content
-### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
+### For vLLM/SGLang and most of OpenAI compatible API provider
 # OPENAI_LLM_MAX_TOKENS=9000
 ### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
 # OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
@@ -283,8 +333,9 @@ LLM_MODEL=gpt-5-mini
 
 ### Google Gemini example (AI Studio)
 # # LLM_BINDING=gemini
+### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
+# # LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
 # # LLM_BINDING_API_KEY=your_gemini_api_key
-# # LLM_BINDING_HOST=https://generativelanguage.googleapis.com
 # # LLM_MODEL=gemini-flash-latest
 
 ### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
@@ -298,7 +349,6 @@ LLM_MODEL=gpt-5-mini
 
 ### Google Vertex AI example
 ### Vertex AI use GOOGLE_APPLICATION_CREDENTIALS instead of API-KEY for authentication
-### LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT means select endpoit based on project and location automatically
 # # LLM_BINDING=gemini
 # # LM_BINDING_HOST=https://aiplatform.googleapis.com
 ### or use DEFAULT_GEMINI_ENDPOINT to select endpoint based on project and location automatically
@@ -329,13 +379,52 @@ OLLAMA_LLM_NUM_CTX=32768
 ### Bedrock uses AWS credentials from the environment / AWS credential chain.
 ### It does not use LLM_BINDING_API_KEY.
 # # LLM_BINDING=aws_bedrock
+# # LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+# # Or set LLM_BINDING_HOST to a custom Bedrock-compatible proxy/gateway URL
 # # LLM_MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0
 # AWS_ACCESS_KEY_ID=your_aws_access_key_id
 # AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
 # AWS_SESSION_TOKEN=your_optional_aws_session_token
 # AWS_REGION=us-east-1
 # BEDROCK_LLM_TEMPERATURE=1.0
 
+###########################################################################
+### Optional role-specific LLM/VLM overrides
+### If unset, each role falls back to the base LLM_* configuration above.
+### Available roles: EXTRACT, KEYWORD, QUERY, VLM
+###########################################################################
+### Example: use a dedicated model/provider for entity extraction
+# EXTRACT_LLM_BINDING=openai
+# EXTRACT_LLM_MODEL=your_extract_model
+# EXTRACT_LLM_BINDING_HOST=https://api.example.com/v1
+# EXTRACT_LLM_BINDING_API_KEY=your_extract_api_key
+# MAX_ASYNC_EXTRACT_LLM=4
+# LLM_TIMEOUT_EXTRACT_LLM=180
+
+### Example: use a dedicated model/provider for keyword extraction
+# KEYWORD_LLM_BINDING=openai
+# KEYWORD_LLM_MODEL=your_keyword_model
+# KEYWORD_LLM_BINDING_HOST=https://api.example.com/v1
+# KEYWORD_LLM_BINDING_API_KEY=your_keyword_api_key
+# MAX_ASYNC_KEYWORD_LLM=4
+# LLM_TIMEOUT_KEYWORD_LLM=180
+
+### Example: use a dedicated model/provider for query answering
+# QUERY_LLM_BINDING=openai
+# QUERY_LLM_MODEL=your_query_model
+# QUERY_LLM_BINDING_HOST=https://api.example.com/v1
+# QUERY_LLM_BINDING_API_KEY=your_query_api_key
+# MAX_ASYNC_QUERY_LLM=4
+# LLM_TIMEOUT_QUERY_LLM=180
+
+### Example: use a dedicated model/provider for multimodal analysis
+# VLM_LLM_BINDING=openai
+# VLM_LLM_MODEL=your_vlm_model
+# VLM_LLM_BINDING_HOST=https://api.example.com/v1
+# VLM_LLM_BINDING_API_KEY=your_vlm_api_key
+# MAX_ASYNC_VLM_LLM=4
+# LLM_TIMEOUT_VLM_LLM=180
+
 #######################################################################################
 ### Embedding Configuration (Should not be changed after the first file processed)
 ### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
@@ -377,7 +466,8 @@ EMBEDDING_SEND_DIM=false
 # # EMBEDDING_MODEL=gemini-embedding-001
 # # EMBEDDING_DIM=1536
 # # EMBEDDING_TOKEN_LIMIT=2048
-# # EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com
+### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
+# # EMBEDDING_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
 # # EMBEDDING_BINDING_API_KEY=your_api_key
 ### Gemini embedding requires sending dimension to server
 # # EMBEDDING_SEND_DIM=true
@@ -397,6 +487,8 @@ OLLAMA_EMBEDDING_NUM_CTX=8192
 ### Bedrock uses AWS credentials from the environment / AWS credential chain.
 ### It does not use EMBEDDING_BINDING_API_KEY.
 # # EMBEDDING_BINDING=aws_bedrock
+# # EMBEDDING_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+# # Or set EMBEDDING_BINDING_HOST to a custom Bedrock-compatible proxy/gateway URL
 # # EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
 # # EMBEDDING_DIM=1024
 # AWS_ACCESS_KEY_ID=your_aws_access_key_id

diff --git a/examples/lightrag_gemini_workspace_demo.py b/examples/lightrag_gemini_workspace_demo.py
@@ -9,12 +9,11 @@
   which ensures that Knowledge Graphs, Vector Databases, and Chunks are
   stored in separate, non-conflicting directories.
 - Independent Configuration: Different workspaces can utilize different
-  ENTITY_TYPES and document sets simultaneously.
+  entity type guidance and document sets simultaneously.
 
 Prerequisites:
 1. Set the following environment variables:
    - GEMINI_API_KEY: Your Google Gemini API key.
-   - ENTITY_TYPES: A JSON string of entity categories (e.g., '["Person", "Organization"]').
 2. Ensure your data directory contains:
    - Data/book-small.txt
    - Data/HR_policies.txt
@@ -25,12 +24,10 @@
 
 import os
 import asyncio
-import json
 import numpy as np
 from lightrag import LightRAG, QueryParam
 from lightrag.llm.gemini import gemini_model_complete, gemini_embed
 from lightrag.utils import wrap_embedding_func_with_attrs
-from lightrag.constants import DEFAULT_ENTITY_TYPES
 
 
 async def llm_model_func(
@@ -59,25 +56,14 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
 
 async def initialize_rag(
     workspace: str = "default_workspace",
-    entities=None,
 ) -> LightRAG:
     """
     Initializes a LightRAG instance with data isolation.
 
-    - entities (if provided) overrides everything
-    - else ENTITY_TYPES env var is used
-    - else DEFAULT_ENTITY_TYPES is used
+    Entity type guidance can be customized by passing
+    addon_params={'entity_types_guidance': '...'} to LightRAG.
     """
 
-    if entities is not None:
-        entity_types = entities
-    else:
-        env_entities = os.getenv("ENTITY_TYPES")
-        if env_entities:
-            entity_types = json.loads(env_entities)
-        else:
-            entity_types = DEFAULT_ENTITY_TYPES
-
     rag = LightRAG(
         workspace=workspace,
         llm_model_name="gemini-2.0-flash",
@@ -86,7 +72,6 @@ async def initialize_rag(
         embedding_func_max_async=4,
         embedding_batch_num=8,
         llm_model_max_async=2,
-        addon_params={"entity_types": entity_types},
     )
 
     await rag.initialize_storages()

diff --git a/lightrag/_version.py b/lightrag/_version.py
@@ -1,4 +1,4 @@
 """Lightweight version definitions shared by packaging and runtime code."""
 
-__version__ = "1.4.16"
+__version__ = "1.5.0"
 __api_version__ = "0288"