### This is sample file of .env
### Server Configuration
HOST=0.0.0.0
PORT=9191
WEBUI_TITLE='My Graph KB'
WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
OLLAMA_EMULATING_MODEL_TAG=latest
# WORKERS=2 # 并发处理的文档数
# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
### Login Configuration
# AUTH_ACCOUNTS='admin:admin123,user1:pass456'
# TOKEN_SECRET=Your-Key-For-LightRAG-API-Server
# TOKEN_EXPIRE_HOURS=48
# GUEST_TOKEN_EXPIRE_HOURS=24
# JWT_ALGORITHM=HS256
### API-Key to access LightRAG Server API
# LIGHTRAG_API_KEY=your-secure-api-key-here
# WHITELIST_PATHS=/health,/api/*
### Optional SSL Configuration
# SSL=true
# SSL_CERTFILE=/path/to/cert.pem
# SSL_KEYFILE=/path/to/key.pem
### Directory Configuration (defaults to current working directory)
### Should not be set if deploy by docker (Set by Dockerfile instead of .env)
### Default value is ./inputs and ./rag_storage
# INPUT_DIR=<absolute_path_for_doc_input_dir>
# WORKING_DIR=<absolute_path_for_working_dir>
### Max nodes return from grap retrieval
# MAX_GRAPH_NODES=1000
### Logging level
# LOG_LEVEL=INFO
# VERBOSE=False
# LOG_MAX_BYTES=10485760
# LOG_BACKUP_COUNT=5
### Logfile location (defaults to current working directory)
# LOG_DIR=/path/to/log/directory
### Settings for RAG query
# HISTORY_TURNS=3
# COSINE_THRESHOLD=0.2
# TOP_K=60
# MAX_TOKEN_TEXT_CHUNK=4000
# MAX_TOKEN_RELATION_DESC=4000
# MAX_TOKEN_ENTITY_DESC=4000
### Entity and ralation summarization configuration
### Language: English, Chinese, French, German ...
SUMMARY_LANGUAGE=Chinese
### Number of duplicated entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
# FORCE_LLM_SUMMARY_ON_MERGE=6
### Max tokens for entity/relations description after merge
# MAX_TOKEN_SUMMARY=500
### Number of parallel processing documents(Less than MAX_ASYNC/2 is recommended)
# MAX_PARALLEL_INSERT=2
### Chunk size for document splitting, 500~1500 is recommended
CHUNK_SIZE=1200
CHUNK_OVERLAP_SIZE=100
### LLM Configuration
ENABLE_LLM_CACHE=true
ENABLE_LLM_CACHE_FOR_EXTRACT=true
### Time out in seconds for LLM, None for infinite timeout
TIMEOUT=None
### Some models like o1-mini require temperature to be set to 1
TEMPERATURE=0
### Max concurrency requests of LLM
MAX_ASYNC=4
### MAX_TOKENS: max tokens send to LLM for entity relation summaries (less than context size of the model)
### MAX_TOKENS: set as num_ctx option for Ollama by API Server
# MAX_TOKENS=16384
MAX_TOKENS=4096
## LLM Binding type: openai, ollama, lollms
LLM_BINDING=openai
LLM_MODEL=glm-4-long
LLM_BINDING_HOST=https://open.bigmodel.cn/api/paas/v4
LLM_BINDING_API_KEY=your_api_key
### Embedding Configuration
### Embedding Binding type: openai, ollama, lollms
EMBEDDING_BINDING=openai
EMBEDDING_MODEL=bge-m3
EMBEDDING_DIM=1024
EMBEDDING_BINDING_HOST=http://127.0.0.1:11436/v1
EMBEDDING_BINDING_API_KEY=your_api_key
# If the embedding service is deployed within the same Docker stack, use host.docker.internal instead of localhost
### Num of chunks send to Embedding in single request
# EMBEDDING_BATCH_NUM=32
### Max concurrency requests for Embedding
# EMBEDDING_FUNC_MAX_ASYNC=16
### Maximum tokens sent to Embedding for each chunk (no longer in use?)
# sd
### Data storage selection
#LIGHTRAG_KV_STORAGE=PGKVStorage
LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
LIGHTRAG_GRAPH_STORAGE=NebulaStorage
# LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
NEBULA_HOSTS=127.0.0.1:9669
NEBULA_USERNAME=root
NEBULA_PASSWORD=nebula
NEBULA_POOL_SIZE=2
NEBULA_SPACE=chunk_entity_relation
# NEBULA_SPACE=game
### TiDB Configuration (Deprecated)
# TIDB_HOST=localhost
# TIDB_PORT=4000
# TIDB_USER=your_username
# TIDB_PASSWORD='your_password'
# TIDB_DATABASE=your_database
### separating all data from difference Lightrag instances(deprecating)
# TIDB_WORKSPACE=default
### PostgreSQL Configuration
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_USER=your_username
POSTGRES_PASSWORD='your_password'
POSTGRES_DATABASE=your_database
POSTGRES_MAX_CONNECTIONS=12
### separating all data from difference Lightrag instances(deprecating)
# POSTGRES_WORKSPACE=default
### Neo4j Configuration neo4j+s://xxxxxxxx.databases.neo4j.io
NEO4J_URI=neo4j://localhost:7687
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD='zyq2004zyq'
### Independent AGM Configuration(not for AMG embedded in PostreSQL)
# AGE_POSTGRES_DB=
# AGE_POSTGRES_USER=
# AGE_POSTGRES_PASSWORD=
# AGE_POSTGRES_HOST=
# AGE_POSTGRES_PORT=8529
# AGE Graph Name(apply to PostgreSQL and independent AGM)
### AGE_GRAPH_NAME is precated
# AGE_GRAPH_NAME=lightrag
### MongoDB Configuration
MONGO_URI=mongodb://root:root@localhost:27017/
MONGO_DATABASE=LightRAG
### separating all data from difference Lightrag instances(deprecating)
# MONGODB_GRAPH=false
### Milvus Configuration
MILVUS_URI=http://localhost:19530
MILVUS_DB_NAME=lightrag
# MILVUS_USER=root
# MILVUS_PASSWORD=your_password
# MILVUS_TOKEN=your_token
### Qdrant
QDRANT_URL=http://localhost:16333
# QDRANT_API_KEY=your-api-key
### Redis
REDIS_URI=redis://localhost:6379
赛题题目:面向GraphRAG的操作系统级优化
优化结果
以原始LightRAG为Baseline,在其RAG抽取效率、图算法性能、图谱构造速度性能三个方面进行优化和测试。下面是测试结果。
问题理解
动态存储架构优化
问题是什么:构建索引会消耗大量Token(也即GraphRAG“昂贵”的原因),导致知识图谱的存储成本高——更新索引的时间成本、存储数据的空间成本。
问题产生的原因:知识图谱的动态更新处理不善。通过考虑最理想的情况:“知识图谱有且只有与问答有关的信息,没有任何无用的信息”,可以分析出问题的本质在于高效利用LLM,将Token花在“刀刃”上。
问题的解决方案:
LLM调用资源占用及速度优化
问题是什么:RAG提取知识图谱所调用的LLM会占用大量GPU显存,且生成速度较慢,如何降低内存占用并提高生成速度。
问题产生的原因:在高并发长文本的LLM处理RAG请求的场景下,使用如vLLM等可开启KV cache缓存的加速推理框架很容易占用满全部KV cache,导致LLM生成吞吐量急剧下降。
问题的解决方案:
QuickStart
下面是.env文件示例: