Skip to main content

Pinecone

Pinecone is a managed cloud vector database optimized for production workloads.

Installation

pip install openstackai[vectordb]
# or specifically
pip install pinecone-client

Connection

from openstackai.vectordb import connect

db = connect(
"pinecone",
api_key="your-api-key",
index_name="my-index",
environment="us-east-1" # Your Pinecone environment
)

Configuration

from openstackai.vectordb.pinecone import PineconeStore

store = PineconeStore(
api_key="your-api-key",
index_name="my-index",
environment="us-east-1",
namespace="default", # Optional namespace
embedding_model="text-embedding-3-small"
)

Creating an Index

Indexes must be created in Pinecone console or via API:

import pinecone

pinecone.init(api_key="your-api-key", environment="us-east-1")

# Create index (do once)
pinecone.create_index(
name="my-index",
dimension=1536, # OpenAI embedding dimension
metric="cosine"
)

Basic Operations

Add Documents

# Simple add
db.add([
"First document",
"Second document"
])

# With metadata and IDs
db.add(
documents=["Document content"],
metadatas=[{"source": "web", "category": "tech"}],
ids=["doc-001"]
)
results = db.search("query text", n=5)

for result in results:
print(f"ID: {result.id}")
print(f"Score: {result.score}")
print(f"Metadata: {result.metadata}")

Search with Filters

results = db.search(
"query",
n=10,
filter={
"category": {"$eq": "tech"},
"year": {"$gte": 2023}
}
)

Update

db.update(
ids=["doc-001"],
metadatas=[{"updated": True}]
)

Delete

# By ID
db.delete(ids=["doc-001", "doc-002"])

# By filter
db.delete(filter={"category": "outdated"})

# Delete all
db.delete(delete_all=True)

Namespaces

Organize data within an index:

# Use specific namespace
db = connect(
"pinecone",
api_key="...",
index_name="my-index",
namespace="production"
)

# Query across namespaces
results = db.search("query", namespace=None) # All namespaces

Filter Operators

OperatorDescriptionExample
$eqEqual{"field": {"$eq": "value"}}
$neNot equal{"field": {"$ne": "value"}}
$gtGreater than{"field": {"$gt": 10}}
$gteGreater or equal{"field": {"$gte": 10}}
$ltLess than{"field": {"$lt": 10}}
$lteLess or equal{"field": {"$lte": 10}}
$inIn array{"field": {"$in": ["a", "b"]}}

Statistics

stats = db.describe_index()
print(f"Total vectors: {stats['total_vector_count']}")
print(f"Dimension: {stats['dimension']}")

See Also