Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
99 commits
Select commit Hold shift + click to select a range
c7e1647
Setup: Dependencies and JRS scripts for RAGAnywhere
johnshearing Dec 27, 2025
299f76a
Fixed merge conflicts in .gitignore and uv.lock
johnshearing Dec 27, 2025
44a218a
Add README for jrs folder
johnshearing Dec 27, 2025
e1e99d1
Reorganize docs into jrs/_notes and update setup.sh
johnshearing Dec 27, 2025
560bed0
Checkpoint: Save my work before syncing
johnshearing Dec 27, 2025
6cb3c55
Checkpoint: Save my work before syncing
johnshearing Dec 27, 2025
f992e80
Fix: handled 2x vector count mismatch in EmbeddingFunc
johnshearing Dec 28, 2025
450258c
Add archive directory to tracking
johnshearing Dec 28, 2025
784db41
split the example index/query script in two
johnshearing Dec 29, 2025
efbf7eb
added image query script
johnshearing Dec 29, 2025
cb5a697
image query loops for each query mode
johnshearing Dec 30, 2025
4b569e9
looping for multiple text query modes
johnshearing Dec 30, 2025
e336e9f
deleted the readme file
johnshearing Dec 30, 2025
20aaa2c
testing multimodal queries
johnshearing Jan 2, 2026
3cb34d9
auto refresh WebUI after merge operation
johnshearing Jan 8, 2026
61e76ed
auto update WebUI after merging entities
johnshearing Jan 8, 2026
e663e9d
updated documentation
johnshearing Jan 8, 2026
90c3e38
Merge remote-tracking branch 'upstream/main'
johnshearing Jan 8, 2026
66e7f7d
final update before starting work on WebUI
johnshearing Jan 11, 2026
6d378a5
Start of building Data Sanitation Utiltiy into WebUI
johnshearing Jan 16, 2026
404fd15
Merge upstream/main and fix conflicts
johnshearing Jan 16, 2026
22b5fc7
Final sync: Integrated upstream changes & updated lock files
johnshearing Jan 16, 2026
decb887
Sanitize Data screen
johnshearing Jan 16, 2026
4d0708f
Arrange controls on Sanitiz Data screen
johnshearing Jan 16, 2026
614c5ca
Updates to SanitizeData.tsx
johnshearing Jan 17, 2026
7938036
Arranging controls on SanitizeData.tsx
johnshearing Jan 17, 2026
279c636
Final arrangment of controls on SanitizeData.tsx
johnshearing Jan 17, 2026
033c819
SanitizeData.tsx: fetch entities and filter results
johnshearing Jan 17, 2026
e294696
coded page controls for SanitizeData.tsx
johnshearing Jan 17, 2026
f235a8e
spinner applied to SanitizeData.tsx for page navigation
johnshearing Jan 17, 2026
2fe6c0e
records displayed change with size of SanitizeData.tsx
johnshearing Jan 17, 2026
4352410
fixed checkbox bug on SanitizeData.tsx
johnshearing Jan 18, 2026
62f1c6c
debugged Show All Button on SanitizeData.tsx
johnshearing Jan 19, 2026
146cf2d
coded the Clear Sel. button on SanitizeData.tsx
johnshearing Jan 19, 2026
805625c
added logic for Target Entity contol on SanitizeData.tsx
johnshearing Jan 19, 2026
bda8b5b
populate description window with data on SanitizeData.tsx
johnshearing Jan 19, 2026
e54b298
Format data on description window: SanitizeData.tsx
johnshearing Jan 20, 2026
aa56356
Removed Show Description button: SanitizeData.tsx
johnshearing Jan 20, 2026
c3c91f3
Added save functionality to description modal dialog SanitizeData.tsx
johnshearing Jan 20, 2026
624c2e4
Building Relationship modal dialog SanitizeData.tsx
johnshearing Jan 21, 2026
c9801e3
Added save to relationship modal dialog SanitizeData.tsx
johnshearing Jan 24, 2026
bd4be39
Added delete func to edit windows: SanitizeData.tsx
johnshearing Jan 24, 2026
1cb4a24
Added Control to select entity type: SanitizeData.tsx
johnshearing Jan 25, 2026
1025dbc
Added modal window for selecting entity types:SanitizeData.tsx
johnshearing Jan 26, 2026
fc4a40d
Orphans button, Fix Select Entity Type Modal:SanitizeData.tsx
johnshearing Jan 27, 2026
7068013
All Of Type button: SanitizeData.tsx
johnshearing Jan 28, 2026
75d4346
Added Orphans button: SanitizeData.tsx
johnshearing Jan 29, 2026
ffc45f6
Logic to enable/disable merge controls: SanitizeData.tsx
johnshearing Jan 29, 2026
86606ae
Enable/disable logic for UI main buttons: SanitizeData.tsx
johnshearing Jan 29, 2026
51f31a5
Added modal window for creating entities: SanitizeData.tsx
johnshearing Jan 30, 2026
21e1c44
Improved error messaging: SanitizeData.tsx
johnshearing Jan 30, 2026
42814ec
Dynamic loading of entity types: SanitizeData.tsx
johnshearing Jan 31, 2026
042a18f
More efficient fetching logic: SanitizeData.tsx
johnshearing Jan 31, 2026
ac8dfd6
Added Entity edit screen: SanitizeData.tsx
johnshearing Feb 1, 2026
cf591df
Added entity delete function: SanitizeData.tsx
johnshearing Feb 1, 2026
0b6e132
Added Create Relation Function: SanitizeData.tsx
johnshearing Feb 1, 2026
6183112
Added Merge Entities functionality: SanitizeData.tsx
johnshearing Feb 1, 2026
3cf3df4
Removed depricated controls: SanitizeData.tsx
johnshearing Feb 1, 2026
7c371cb
Fixed breaking changes in dependencies: ChatMessage.tsx
johnshearing Feb 2, 2026
91f5ef3
Rearranged controls for better UX: SanitizeData.tsx
johnshearing Feb 2, 2026
ea854b7
Updated notes for syncing with upstream
johnshearing Feb 2, 2026
07f9480
Merge remote-tracking branch 'upstream/main'
johnshearing Feb 2, 2026
f990f0d
Started work on tab order: SanitizeData.tsx
johnshearing Feb 3, 2026
1cff67d
Assigned tab order to controls: SanitizeData.tsx
johnshearing Feb 3, 2026
41aeaf2
Added hotkeys: SanitizeData.tsx
johnshearing Feb 3, 2026
e879638
Fixed Formatting and Linting
johnshearing Feb 3, 2026
10ca83d
Final formatting sweep
johnshearing Feb 3, 2026
bf4c58f
Fix linting and formating in various files
johnshearing Feb 3, 2026
70fbb42
Fix linting and formatting errors detected by CI
johnshearing Feb 4, 2026
6aeb3cf
Update logic for Reset All button: SanitizeData.tsx
johnshearing Feb 4, 2026
c71c4aa
Improved merge logic: SanitizeData.tsx
johnshearing Feb 4, 2026
ed0c0b5
Added video tutorial button: SanitizeData.tsx
johnshearing Feb 5, 2026
8411ef1
Removed place holders: SanitizeData.tsx
johnshearing Feb 6, 2026
e668e05
Tightened related entity filter: SanitizeData.tsx
johnshearing Feb 7, 2026
890d762
Removed place holders: SanitizeData.tsx
johnshearing Feb 7, 2026
982a709
Changed two controls from disabled to read only: SanitizeData.tsx
johnshearing Feb 8, 2026
d6055cb
Fixed stale cache when editing relationships: SanitizeData.tsx
johnshearing Feb 8, 2026
b2bd996
Fixed stale cache when editing entities: SanitizeData.tsx
johnshearing Feb 8, 2026
e35be46
Click anywhere on row to select checkbox: SanitizeData.tsx
johnshearing Feb 8, 2026
0e7d521
Focus on filter when starting: SanitizeData.tsx
johnshearing Feb 8, 2026
de7a7ff
Adjusted tab order for better UX: SanitizeData.tsx
johnshearing Feb 9, 2026
41952c3
Tab order and other improvements to UX: SanitizeData.tsx
johnshearing Feb 10, 2026
cec6080
Fixed hotkey issue: SanitizeData.tsx
johnshearing Feb 10, 2026
ff7336e
Fixed issues with Select Entity Type dialog: SanitizeData.tsx
johnshearing Feb 11, 2026
988fafd
Improved UX for selecting entity types: SanitizeData.tsx
johnshearing Feb 11, 2026
a3a4e04
Workflow, Tab orders, Hotkeys, UX: SanitizeData.tsx
johnshearing Feb 11, 2026
d662fcf
Much simpler UI: SanitizeData.tsx
johnshearing Feb 12, 2026
38a2685
Simplified User Interface: SanitizeData.tsx
johnshearing Feb 13, 2026
02aeeda
Merge remote-tracking branch 'upstream/main'
johnshearing Feb 13, 2026
1828f4d
WebUI: Remove chunks from context when Chunk Top K set to 0
johnshearing Feb 15, 2026
63edf24
Now can add relationships in batches: SanitizeData.tsx
johnshearing Feb 16, 2026
9750239
Fixed edge-case save failure: SanitizeData.tsx
johnshearing Feb 17, 2026
9367ddb
Esc key for batch processing window: SanitizeData.tsx
johnshearing Feb 18, 2026
ac0f744
Added progress spinners to batch processes: SanitizeData.tsx
johnshearing Feb 23, 2026
56e9178
Added progress spinners to batch processes: SanitizeData.tsx
johnshearing Feb 23, 2026
1fc008a
changed from local host to generic access: SanitizeData.tsx
johnshearing Feb 23, 2026
db7a884
Updated to Pull Request #2731
johnshearing Mar 18, 2026
5e51df7
Checkpoint: Save WebUI modifications and built assets
johnshearing Mar 24, 2026
8801f68
Merge remote-tracking branch 'upstream/main'
johnshearing Mar 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,30 @@ download_models_hf.py
memory-bank
.claude/CLAUDE.md
.claude/

# Claude Code
CLAUDE.md

# Google Jules
.jules/

# My changes to the gitignore file
# 1. Ignore everything in the repository
/*

# 2. Allow the 'jrs' folder itself
!/jrs/

# 3. Ignore everything inside 'jrs' (including the 'work' folder)
/jrs/*

# 4. Allow specific files directly inside 'jrs'
!/jrs/*.*

# 5. Allow the _notes folder specifically
!/jrs/_notes/

# 6. Allow everything inside the _notes folder
!/jrs/_notes/**

# 7. Allow the archive folder specifically
!/jrs/archive/

# 8. Allow everything inside the _notes folder
!/jrs/archive/**
144 changes: 144 additions & 0 deletions jrs/_0_interesting_queries.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
## Ways To Query The LighRAG Server

## Query 1

### LightRAG server will produce links which will bring you to the exact spot in a video where the query is addressed.

The magic which make this possible is in the way the data was prepared and indexed.
[This library](https://github.com/johnshearing/scrape_yt_mk_transcripts) was used to prepare the data for indexing.
<br>
How the query is written also matters. The following is an example.

**Paste the following query into the LightRAG server prompt box.
Try Naive, Local, Global, Hybrid, and Mix mode for each query**


[The source text is a transcript from the source video. The URL for the source video is found in the metadata associated with the source text. Time stamps in the source text appear between brackets at the beginning of each line as shown in the following sample text: <sample text begins here>[6.56 > 11.4] (Roger) the different years that in those that had COVID there were higher levels of oxidative stress.<sample text ends here> In the sample text, the values given between the brackets are given in seconds and decimal fractions of a second. So the first value in the timestamp sample is six seconds and 56 hundreths of a second. Only use the integer part of the value and only return the first value in the pair. So from the sample text, only use the number 6 to represent six seconds into the video when generating your answer. To say it another way, only use the whole number part of the first value given between the brackets when forming your answer. Please provide URLs in your responses for source video that incorporate the timestamps found in the source text so that the videos can be viewed at the moment which is relevant to the query response. The following is an example of how the URLs should be structured if the source text is from Ln3WszTq0uA.txt and the moment of interest has a time stamp of [6.56 > 11.68]: https://www.youtube.com/watch?v=Ln3WszTq0uA&t=6s. To build the URL always start with the URL you find in the metadata which is associated with the source text. In this case the URL associated with the source text is "https://www.youtube.com/watch?v=Ln3WszTq0uA". Append the characters &t= to the URL. So now the URL will look as follows: "https://www.youtube.com/watch?v=Ln3WszTq0uA&t=". Next, append the number 6 to the URL if the timestamp is [6.56 > 11.4] because we are only interested in the whole number part of the first value in the timestamp. So now the URL will look as follows: "https://www.youtube.com/watch?v=Ln3WszTq0uA&t=6". Finally, append the letter s to the URL to denote seconds. So the final resulting URL will look as follows: "https://www.youtube.com/watch?v=Ln3WszTq0uA&t=6s"] Please search for all instances where infrared light is discussed with regard to studies done in Brazil.

**The following was the response produced**

Infrared Light Studies in Brazil
In the context of infrared light and its potential benefits for COVID-19, there is a notable study mentioned that was conducted in Brazil. This study involved the use of a jacket for patients with mild to moderate COVID-19 who were hospitalized but not on ventilators. The findings indicated that the use of near-infrared light therapy significantly improved several pulmonary functions and reduced hospitalization time by four days. Specifically, there was a 32% improvement in maximum inspiratory pressure and a 23% improvement in maximum expiratory pressure.

The researchers highlighted the systemic effects of photobiomodulation, which suggests that near-infrared light could have broader implications for treating conditions like COVID-19. However, the study's main limitation was the small cohort size of only 30 patients, suggesting that further research with larger populations is necessary to strengthen the conclusions.

For more information on this topic, you can view the relevant section of the video discussing the Brazilian study at this timestamp: https://www.youtube.com/watch?v=Hy8G5TKxiLY&t=2102s.

### Summary
The discussion around topical sunscreen highlights a critical view of its use in preventing skin cancer, suggesting that blocking UV light with sunscreen may lead to adverse health outcomes, including an increased risk of melanoma.

---
#### The following explains the effect of the query formating in the above example

* Query methods available are naive, local, global, hybrid, mix, and bypass.
* naive only looks in the vector database for information but will give thoughtfull answers beyond what it finds. So it's not just searching for words. Rather it is searching for meaning and reporting on that.
* local uses information found in the knowledge graph nodes.
* global uses information found in knowledge graph relations.
* hybrid combines local and global methods.
* mix combines hybrid with naive
* The results vary significantly based on the method used.
* There is no telling which will give the best result so it is good to try them all.
* The python script _2_index_query_??.py does indeed try all the methods so it might be wise to run the query in the script first to see which method produces the best response and then run the query again in the LightRAG server to get the best formatting.

* [Time stamps in the...]
* Specify special instructions or formating instructions in brackets before the query. This greatly improves the quality of the response.


## Query 2

### Metadata: LightRAG server will list data about the source material and produce links to that source material.

The magic which make this possible is in the way the data was prepared and indexed.
[This library](https://github.com/johnshearing/scrape_yt_mk_transcripts) was used to prepare the data for indexing.
<br>
How the query is written also matters. The following is an example.

**Paste the following query into the LightRAG server prompt box.**

/naive Please list the metadata for the source material.

**The following is was the response produced**

### Metadata for the Source Material
- **Video Title:** Dr Jack Kruse: Light, Melanin, POMC in Human Evolution & Disease
- **Video URL:** [Watch here](https://www.youtube.com/watch?v=Ln3WszTq0uA)
- **Video Platform:** YouTube
- **Video Channel:** Max Gulhane MD
- **Posted Date:** April 2, 2023
- **Language:** English

### References
- [DC] Ln3WszTq0uA_metadata.json

## Query 3

### LightRAG server will produce diagrams.

How the query is written matters.
The query type is specified first: /naive
Special instructions about the output go in between brackets []
Finally the query is written.
The following is an example.

**Paste the following query into the LightRAG server prompt box.**

>/mix [Use mermaid format for diagrams] Please draw a character relationship diagram for Age of Mammals

**The following is was the response produced**

```mermaid
graph TD;
A[Age of Mammals] -->|Describes| B[Max]
A -->|Emergence after| C[The Last Extinction Event]
A -->|Links to| D[Darwin's Theory of Evolution]
A -->|Impacted by| E[Asteroid Event]
A -->|Related to| F[Human Evolution]
A -->|Significant for| G[Dinosaurs]
```

### Explanation:
- The diagram illustrates the relationships surrounding the "Age of Mammals," showing its connections to key entities such as Max, the Last Extinction Event, Darwin's Theory of Evolution, the Asteroid Event, Human Evolution, and Dinosaurs. Each relationship highlights how these entities are interconnected in the context of evolutionary history.

## Query 4

### LightRAG can find orphans in the knowledge graph.

This only works in a local or mixed mode query.
And currently this will only work for 23 items in the list when using LR-Server.
When this same query is use in a python query script many more list items can be used in the query.
I am trying to discover what is making that difference.
The query type is specified first: /local
Special instructions about the output go in between brackets []
Finally the query is written.

The following is an example.
**Paste the following query into the LightRAG server prompt box.**

>/local [Only provide the name of the entity in the response. Nothing else is required.] Please examine the Entity ID for all Entities in the following python list. Then please return only the Entities with a Rank of 0. [ "2023-04-02T06:06:17Z", "2023-04-22T23:01:27Z", "Alabama", "Albury", "Biological Compartments", "Cellular Health", "Circadian Biology", "Circadian Health", "Circadian Rhythm", "Cold Thermogenesis Protocol", "Cytochrome C Oxidase", "Delta Airlines", "Deuterium Depleted Water", "Dr Jack Kruse: Light, Melanin, POMC in Human Evolution & Disease", "Dr Jack Kruse: WATER, non-native EMFs & mitochondrial basis of disease | Regenerative Health Podcast", "Dr. Anthony Chafee", "Dr. Jack Kruse", "Dr. Max Gulhane", "Eureka Moment", "Farm Tour", "Health Optimization"]

**The following is was the response produced**

>The only Entity from your provided list with a Rank of 0 is:
>
>Albury
>This entity corresponds to the location in New South Wales where Dr. Max Gulhane is based.

## Query 5

### LightRAG can find Type, Description, Rank, and File Path in the knowledge graph. In the previous example we searched from Entity to Entity ID to Rank in order to find orphans. In the following example we go through Entity to Entity ID to Type in order to return categories.

This only works in a mixed mode query.
The query type is specified first: /mixed
Special instructions about the output go in between brackets []
Finally the query is written.

The following is an example.
**Paste the following query into the LightRAG server prompt box.**

>/mix [Only provide the name of the entity in the response. Nothing else is required.] Please examine the Entity ID for all Entities in the following python list. Then please return only the Entities with a Type of Person. [ "2023-04-02T06:06:17Z", "2023-04-22T23:01:27Z", "Alabama", "Albury", "Biological Compartments", "Cellular Health", "Circadian Biology", "Circadian Health", "Circadian Rhythm", "Cold Thermogenesis Protocol", "Cytochrome C Oxidase", "Delta Airlines", "Deuterium Depleted Water", "Dr Jack Kruse: Light, Melanin, POMC in Human Evolution & Disease", "Dr Jack Kruse: WATER, non-native EMFs & mitochondrial basis of disease | Regenerative Health Podcast", "Dr. Anthony Chafee", "Dr. Jack Kruse", "Dr. Max Gulhane", "Eureka Moment", "Farm Tour", "Health Optimization"]

**The following is was the response produced**

>Dr. Anthony Chafee
Dr. Jack Kruse
Dr. Max Gulhane
170 changes: 170 additions & 0 deletions jrs/_1_custom_index_01.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import os
import asyncio
import logging
import logging.config
import json
import numpy as np
from lightrag import LightRAG
from lightrag.llm.openai import gpt_4o_mini_complete
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import logger, set_verbose_debug, EmbeddingFunc
from llama_index.embeddings.openai import OpenAIEmbedding

# import textract

# Configuration
WORKING_DIR = "/home/js/LightRAG/jrs/work/seheult/_seheult_work_dir"
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-large")
EMBEDDING_DIM = int(os.getenv("EMBEDDING_DIM", 3072))
API_KEY = os.getenv("EMBEDDING_BINDING_API_KEY")
MAX_TOKEN_SIZE = int(os.getenv("MAX_TOKEN_SIZE", 8192))

# Files to be indexed
files_2b_indexed = [
"/home/js/LightRAG/jrs/work/seheult/seheult_metadata/_bNySyEobfY_metadata.json",
"/home/js/LightRAG/jrs/work/seheult/seheult_metadata/0m1Qekrfs7w_metadata.json",
]


def configure_logging():
"""Configure logging with console and rotating file handlers."""
for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
logger_instance = logging.getLogger(logger_name)
logger_instance.handlers = []
logger_instance.filters = []
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag_index.log"))
print(f"\nLightRAG index log file: {log_file_path}\n")
os.makedirs(os.path.dirname(log_dir), exist_ok=True)
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760))
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5))
logging.config.dictConfig(
{
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"default": {"format": "%(levelname)s: %(message)s"},
"detailed": {
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
},
},
"handlers": {
"console": {
"formatter": "default",
"class": "logging.StreamHandler",
"stream": "ext://sys.stderr",
},
"file": {
"formatter": "detailed",
"class": "logging.handlers.RotatingFileHandler",
"filename": log_file_path,
"maxBytes": log_max_bytes,
"backupCount": log_backup_count,
"encoding": "utf-8",
},
},
"loggers": {
"lightrag": {
"handlers": ["console", "file"],
"level": "INFO",
"propagate": False,
},
},
}
)
logger.setLevel(logging.INFO)
set_verbose_debug(os.getenv("VERBOSE_DEBUG", "true").lower() == "true")


if not os.path.exists(WORKING_DIR):
os.makedirs(WORKING_DIR)


async def initialize_rag():
"""Initialize LightRAG with custom embedding function."""
print("Initializing LightRAG for indexing...")

# Initialize embedding model
embed_model = OpenAIEmbedding(
model=EMBEDDING_MODEL, api_key=API_KEY, dimensions=EMBEDDING_DIM
)

# Define async embedding function
async def async_embedding_func(texts):
# llama-index returns a list; we convert it to a numpy array for LightRAG
embeddings = await embed_model.aget_text_embedding_batch(texts)
return np.array(embeddings)

# Define embedding function
embedding_func = EmbeddingFunc(
embedding_dim=EMBEDDING_DIM,
max_token_size=MAX_TOKEN_SIZE,
func=async_embedding_func,
)

# Initialize LightRAG
rag = LightRAG(
working_dir=WORKING_DIR,
embedding_func=embedding_func,
llm_model_func=gpt_4o_mini_complete,
)

await rag.initialize_storages()
await initialize_pipeline_status()
return rag


async def main():
"""Main function to index documents."""
rag = None
try:
if not os.getenv("OPENAI_API_KEY") and not API_KEY:
raise ValueError(
"OPENAI_API_KEY or EMBEDDING_BINDING_API_KEY environment variable not set"
)
rag = await initialize_rag()

# Check which files are already indexed
indexed_files = set()
doc_status_file = os.path.join(WORKING_DIR, "kv_store_doc_status.json")
if os.path.exists(doc_status_file):
with open(doc_status_file, "r") as f:
docs = json.load(f)
indexed_files = {
doc["file_path"]
for doc in docs.values()
if doc.get("status") == "processed" and "file_path" in doc
}
print(f"Already indexed files: {indexed_files}")

# Index new documents
for doc_path in files_2b_indexed:
if doc_path in indexed_files:
print(f"Skipping already indexed file: {doc_path}")
continue
print(f"Checking document at: {doc_path}")
if not os.path.exists(doc_path):
print(f"Document file not found at: {doc_path}, skipping...")
continue
print(f"Indexing document: {doc_path}...")

with open(doc_path, "r") as f:
docs = json.load(f)

await rag.ainsert_custom_kg(docs, full_doc_id=os.path.basename(doc_path))
print(f"Indexed {doc_path}")
except Exception as e:
print(f"An error occurred: {e}")
import traceback

traceback.print_exc()
finally:
if rag:
print("Finalizing storages...")
await rag.finalize_storages()


if __name__ == "__main__":
configure_logging()
asyncio.run(main())
print("\nIndexing Done!")
Loading