Skip to main content

ChromaDB

ChromaDB is an open-source embedding database that runs locally or in-memory.

Installation

pip install openstackai[vectordb]
# or specifically
pip install chromadb

Connection

from openstackai.vectordb import connect

# Persistent storage
db = connect("chroma", path="./chroma_db")

# In-memory (for testing)
db = connect("chroma", persist=False)

# With custom settings
db = connect(
"chroma",
path="./data",
collection_name="my_collection"
)

Configuration

from openstackai.vectordb.chroma import ChromaStore

store = ChromaStore(
path="./chroma_db", # Storage path
collection_name="documents", # Collection name
embedding_model="text-embedding-3-small" # Embedding model
)

Basic Operations

Add Documents

# Simple add
db.add([
"First document text",
"Second document text"
])

# With metadata and IDs
db.add(
documents=["Document content"],
metadatas=[{"source": "web", "date": "2024-01-15"}],
ids=["doc-001"]
)
results = db.search("query text", n=5)

for result in results:
print(f"ID: {result.id}")
print(f"Content: {result.content}")
print(f"Score: {result.score}")
print(f"Metadata: {result.metadata}")

Search with Filters

# Exact match
results = db.search(
"query",
n=10,
filter={"source": "web"}
)

# Multiple conditions
results = db.search(
"query",
filter={
"$and": [
{"source": "web"},
{"date": {"$gte": "2024-01-01"}}
]
}
)

Update

db.update(
ids=["doc-001"],
documents=["Updated content"],
metadatas=[{"updated": True}]
)

Delete

# By ID
db.delete(ids=["doc-001", "doc-002"])

# By filter
db.delete(filter={"source": "outdated"})

# Clear collection
db.delete(all=True)

Collections

# Create new collection
db.create_collection("new_collection")

# List collections
collections = db.list_collections()

# Switch collection
db.use_collection("other_collection")

# Delete collection
db.delete_collection("old_collection")

Embeddings

Default (OpenAI)

db = connect("chroma", embedding_model="text-embedding-3-small")

Custom Embedding Function

def my_embed(texts: list[str]) -> list[list[float]]:
# Your embedding logic
return embeddings

db = connect("chroma", embedding_function=my_embed)

Local Embeddings

from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

db = connect(
"chroma",
embedding_function=lambda texts: model.encode(texts).tolist()
)

See Also