@@ -35,6 +35,7 @@ class LLMType(str, Enum):
35
35
CUSTOM = "custom"
36
36
37
37
class ParserType (str , Enum ):
38
+ TEXT = "text"
38
39
UNSTRUCTURED = "unstructured"
39
40
PYMUPDF = "pymupdf"
40
41
PYPDF = "pypdf"
@@ -54,6 +55,13 @@ class ChunkingStrategy(str, Enum):
54
55
SEMANTIC = "SemanticChunker"
55
56
CUSTOM = "custom"
56
57
58
+ NO_CHUNK_SIZE_STRATEGIES = [
59
+ ChunkingStrategy .MARKDOWN ,
60
+ ChunkingStrategy .HTML ,
61
+ ChunkingStrategy .SEMANTIC ,
62
+ ChunkingStrategy .CUSTOM
63
+ ]
64
+
57
65
class EmbeddingType (str , Enum ):
58
66
OPENAI = "openai"
59
67
AZURE_OPENAI = "azure_openai"
@@ -123,7 +131,7 @@ def get_class():
123
131
LLM_MAP = {
124
132
LLMType .OPENAI : lazy_load ("langchain_openai" , "ChatOpenAI" ),
125
133
LLMType .AZURE_OPENAI : lazy_load ("langchain_openai" , "AzureChatOpenAI" ),
126
- LLMType .HUGGINGFACE : lazy_load ("langchain_huggingface" , "HuggingFaceHub " ),
134
+ LLMType .HUGGINGFACE : lazy_load ("langchain_huggingface" , "HuggingFaceEndpoint " ),
127
135
LLMType .OLLAMA : lazy_load ("langchain_ollama" , "OllamaChat" ),
128
136
LLMType .COHERE : lazy_load ("langchain_community.llms" , "Cohere" ),
129
137
LLMType .VERTEXAI : lazy_load ("langchain_google_vertexai" , "VertexAI" ),
@@ -132,6 +140,8 @@ def get_class():
132
140
}
133
141
134
142
LOADER_MAP = {
143
+ # ParserType.UNSTRUCTURED: lazy_load("langchain_unstructured", "UnstructuredLoader"),
144
+ ParserType .TEXT : lazy_load ("langchain.document_loaders" , "TextLoader" ),
135
145
ParserType .UNSTRUCTURED : lazy_load ("langchain_community.document_loaders" , "UnstructuredFileLoader" ),
136
146
ParserType .PYMUPDF : lazy_load ("langchain_community.document_loaders" , "PyMuPDFLoader" ),
137
147
ParserType .PYPDF : lazy_load ("langchain_community.document_loaders" , "PyPDFLoader" ),
@@ -163,18 +173,18 @@ def get_class():
163
173
}
164
174
165
175
VECTORDB_MAP = {
166
- VectorDatabase .FAISS : lazy_load ("langchain .vectorstores" , "FAISS" ),
167
- VectorDatabase .CHROMA : lazy_load ("langchain.vectorstores " , "Chroma" ),
168
- VectorDatabase .PINECONE : lazy_load ("langchain.vectorstores " , "Pinecone " ),
169
- VectorDatabase .WEAVIATE : lazy_load ("langchain .vectorstores" , "Weaviate " ),
170
- VectorDatabase .QDRANT : lazy_load ("langchain.vectorstores " , "Qdrant " ),
171
- VectorDatabase .MILVUS : lazy_load ("langchain.vectorstores " , "Milvus" ),
172
- VectorDatabase .PGVECTOR : lazy_load ("langchain.vectorstores " , "PGVector" ),
173
- VectorDatabase .ELASTICSEARCH : lazy_load ("langchain.vectorstores " , "ElasticsearchStore" ),
176
+ VectorDatabase .FAISS : lazy_load ("langchain_community .vectorstores" , "FAISS" ),
177
+ VectorDatabase .CHROMA : lazy_load ("langchain_chroma " , "Chroma" ),
178
+ VectorDatabase .PINECONE : lazy_load ("langchain_pinecone " , "PineconeVectorStore " ),
179
+ VectorDatabase .WEAVIATE : lazy_load ("langchain_weaviate .vectorstores" , "WeaviateVectorStore " ),
180
+ VectorDatabase .QDRANT : lazy_load ("langchain_qdrant " , "QdrantVectorStore " ),
181
+ VectorDatabase .MILVUS : lazy_load ("langchain_milvus " , "Milvus" ),
182
+ VectorDatabase .PGVECTOR : lazy_load ("langchain_postgres " , "PGVector" ),
183
+ VectorDatabase .ELASTICSEARCH : lazy_load ("langchain-elasticsearch " , "ElasticsearchStore" ),
174
184
}
175
185
176
186
RETRIEVER_MAP = {
177
- RetrieverType .BM25 : lazy_load ("langchain .retrievers" , "BM25Retriever" ),
187
+ RetrieverType .BM25 : lazy_load ("langchain_community .retrievers" , "BM25Retriever" ),
178
188
}
179
189
180
190
RERANKER_MAP = {
@@ -226,6 +236,12 @@ def get_class():
226
236
227
237
# Environment variable requirements for components
228
238
COMPONENT_ENV_REQUIREMENTS = {
239
+ # Unstructured
240
+ ParserType .UNSTRUCTURED : {
241
+ "required" : [],
242
+ "optional" : [],
243
+ "packages" : [_PkgSpec ("langchain-unstructured" )]
244
+ },
229
245
# Embedding Models
230
246
EmbeddingType .AZURE_OPENAI : {
231
247
"required" : ["AZURE_OPENAI_API_KEY" , "AZURE_OPENAI_ENDPOINT" ],
@@ -350,40 +366,59 @@ def get_class():
350
366
VectorDatabase .PINECONE : {
351
367
"required" : ["PINECONE_API_KEY" , "PINECONE_ENVIRONMENT" ],
352
368
"optional" : [],
353
- "packages" : [_PkgSpec ("pinecone-client" , "pinecone" )]
369
+ "packages" : [
370
+ _PkgSpec ("langchain-pinecone" ),
371
+ _PkgSpec ("pinecone-client" , "pinecone" )
372
+ ]
354
373
},
355
374
VectorDatabase .WEAVIATE : {
356
375
"required" : ["WEAVIATE_URL" , "WEAVIATE_API_KEY" ],
357
376
"optional" : [],
358
- "packages" : [_PkgSpec ("weaviate-client" , "weaviate" )]
377
+ "packages" : [
378
+ _PkgSpec ("weaviate-client" , "weaviate" ),
379
+ _PkgSpec ("langchain-weaviate" )
380
+ ]
359
381
},
360
382
VectorDatabase .QDRANT : {
361
383
"required" : ["QDRANT_URL" ],
362
384
"optional" : ["QDRANT_API_KEY" ],
363
- "packages" : [_PkgSpec ("qdrant-client" , "qdrant" )]
385
+ "packages" : [
386
+ _PkgSpec ("qdrant-client" , "qdrant" ),
387
+ _PkgSpec ("langchain-qdrant" )
388
+ ]
364
389
},
365
390
VectorDatabase .MILVUS : {
366
391
"required" : ["MILVUS_HOST" , "MILVUS_PORT" ],
367
392
"optional" : [],
368
- "packages" : [_PkgSpec ("pymilvus" )]
393
+ "packages" : [
394
+ _PkgSpec ("pymilvus" ),
395
+ _PkgSpec ("langchain-milvus" )
396
+ ]
369
397
},
370
398
VectorDatabase .PGVECTOR : {
371
399
"required" : ["PGVECTOR_CONNECTION_STRING" ],
372
400
"optional" : [],
373
401
"packages" : [
402
+ _PkgSpec ("langchain-postgres" ),
374
403
_PkgSpec ("psycopg2-binary" ),
375
404
_PkgSpec ("pgvector" )
376
405
]
377
406
},
378
407
VectorDatabase .ELASTICSEARCH : {
379
408
"required" : ["ELASTICSEARCH_URL" ],
380
409
"optional" : ["ELASTICSEARCH_API_KEY" ],
381
- "packages" : [_PkgSpec ("elasticsearch" )]
410
+ "packages" : [
411
+ _PkgSpec ("elasticsearch" ),
412
+ _PkgSpec ("langchain-elasticsearch" )
413
+ ]
382
414
},
383
415
VectorDatabase .CHROMA : {
384
416
"required" : [],
385
417
"optional" : [],
386
- "packages" : [_PkgSpec ("chromadb" )]
418
+ "packages" : [
419
+ _PkgSpec ("langchain-chroma" ),
420
+ _PkgSpec ("chromadb" )
421
+ ]
387
422
},
388
423
VectorDatabase .FAISS : {
389
424
"required" : [],
0 commit comments