VectorDB Module
The VectorDB module provides connectors and abstractions for vector databases, enabling semantic search and RAG capabilities.
Overview
from openstackai.vectordb import connect
from openstackai.integrations.vector_db import VectorStore, Document
Supported Databases
| Database | Description |
|---|---|
| ChromaDB | Open-source, lightweight |
| Pinecone | Managed cloud service |
| Qdrant | High-performance, Rust-based |
| Weaviate | GraphQL-based, multi-modal |
Quick Start
Connect to Database
from openstackai.vectordb import connect
# ChromaDB (local)
db = connect("chroma", path="./my_db")
# Pinecone (cloud)
db = connect("pinecone", api_key="...", index_name="my-index")
# Qdrant (local or cloud)
db = connect("qdrant", url="http://localhost:6333")
# Weaviate
db = connect("weaviate", url="http://localhost:8080")
Store Documents
# Store text documents
db.add([
"The quick brown fox jumps over the lazy dog.",
"Machine learning is transforming industries.",
"Python is a versatile programming language."
])
# Store with metadata
db.add(
documents=["Document content here..."],
metadatas=[{"source": "wiki", "category": "tech"}],
ids=["doc-001"]
)
Search
# Semantic search
results = db.search("What is machine learning?", n=5)
for doc in results:
print(f"Score: {doc.score:.3f} - {doc.content[:100]}")
With Filters
results = db.search(
query="Python programming",
n=10,
filter={"category": "tech"}
)
Document Class
from openstackai.integrations.vector_db import Document
doc = Document(
content="Document text content",
metadata={"author": "John", "date": "2024-01-15"},
embedding=[0.1, 0.2, ...], # Optional
id="doc-123"
)
Embedding Functions
# Use default OpenAI embeddings
db = connect("chroma", embedding_model="text-embedding-3-small")
# Custom embedding function
def my_embeddings(texts: list[str]) -> list[list[float]]:
# Your embedding logic
return embeddings
db = connect("chroma", embedding_function=my_embeddings)
RAG Integration
from openstackai.easy import rag
# Index documents
index = rag.index("./documents")
# Query with RAG
answer = rag.ask(
index,
"What is the main conclusion?",
n_results=5
)
Operations
Add Documents
db.add(
documents=["text1", "text2"],
ids=["id1", "id2"],
metadatas=[{}, {}]
)
Update Documents
db.update(
ids=["id1"],
documents=["Updated text"],
metadatas=[{"updated": True}]
)
Delete Documents
db.delete(ids=["id1", "id2"])
db.delete(filter={"category": "outdated"})
Count
count = db.count()
print(f"Total documents: {count}")