Advanced Usage Example

This example shows advanced features of langchain-memvid including custom encoders and batch processing.
# Generated by ipykernel_memvid_extension from %dump in advanced.ipynb. DO NOT EDIT.

# LangChain MemVid Advanced Usage Guide
# This notebook demonstrates the advanced features and components of the LangChain MemVid library, showing how to work
# with individual components for more fine-grained control.

# Setup and Component Imports
# We'll import the core components that allow us to work with the system at a lower level:
# Encoder for converting text to video
# IndexConfig and IndexManager for managing the vector index
# QRCodeConfig and VideoConfig for customizing the storage format
# VideoProcessor for direct video manipulation
# Retriever for searching stored data

from langchain_huggingface import HuggingFaceEmbeddings
from pathlib import Path
from langchain_memvid import (
    Encoder,
    IndexConfig,
    IndexManager,
    QRCodeConfig,
    VectorStoreConfig,
    VideoConfig,
    Retriever
)
from langchain_memvid.video import VideoProcessor

# Setting Up the Index
# First, we'll create and configure the vector index:
# Create an index configuration with FAISS backend
# Initialize the embedding model
# Set up the index manager
# Add sample texts with metadata to the index

# Create index configuration
config = IndexConfig(
    index_type="faiss",
    metric="cosine",
    nlist=6  # Number of clusters for IVF index
)
# Initialize Embeddings
embeddings = HuggingFaceEmbeddings()
# Create index manager
index_manager = IndexManager(config=config, embeddings=embeddings)
# Example text chunks
texts = [
    "The quick brown fox jumps over the lazy dog",
    "A fast orange fox leaps across a sleepy canine",
    "The weather is beautiful today",
    "It's raining cats and dogs outside",
    "Python is a popular programming language",
    "JavaScript is widely used for web development"
]
# Example metadata for each text
metadata = [
    {"id": 0, "source": "example1.txt", "category": "animals"},
    {"id": 1, "source": "example1.txt", "category": "animals"},
    {"id": 2, "source": "example2.txt", "category": "weather"},
    {"id": 3, "source": "example2.txt", "category": "weather"},
    {"id": 4, "source": "example3.txt", "category": "programming"},
    {"id": 5, "source": "example3.txt", "category": "programming"}
]
# Add texts with metadata
# The index will be created automatically with the correct dimension
# and trained if using an IVF index
index_manager.add_texts(texts, metadata)

# Testing Initial Search Functionality
# Let's verify our index is working by performing some test searches. This demonstrates the basic search functionality
# before we encode the data into video format.

# Example searches
queries = [
    "Tell me about foxes",
    "What's the weather like?",
    "What programming languages are mentioned?"
]
results = [
    {
        "query": query,
        "text": result.text,
        "source": result.source,
        "category": result.category,
        "similarity": f"{result.similarity:.4f}"
    }
    for query in queries
    for result in index_manager.search_text(query, k=2)
]

# Video Processing Setup
# Now we'll set up the video processing components:
# Configure video parameters (resolution, FPS, codec)
# Configure QR code generation parameters
# Create a test video with our data
# Verify we can decode the data back from the video

video_config = VideoConfig(
    fps=30,
    resolution=(1920, 1080),
    codec="mp4v",
)
qrcode_config = QRCodeConfig(
    error_correction="H",
    box_size=10,
    border=4
)
video_processor = VideoProcessor(
    video_config=video_config,
    qrcode_config=qrcode_config
)
# Create a test video
data = [
    "The quick brown fox jumps over the lazy dog",
    "A fast orange fox leaps across a sleepy canine",
    "The weather is beautiful today",
    "It's raining cats and dogs outside",
    "Python is a popular programming language",
    "JavaScript is widely used for web development"
]
images = [video_processor.create_qr_code(d) for d in data]
output_path = Path("test_video.mp4")
# Encode the image into a video
video_processor.encode_video(
    frames=images,
    output_path=output_path
)
frames = video_processor.decode_video(Path("test_video.mp4"))
decoded_data = []
for frame in frames:
    decoded_data.extend(video_processor.extract_qr_codes(frame))

# Building the Complete System
# Here we combine all components to create a complete vector store system:
# Configure the vector store settings
# Create an encoder with our configurations
# Build the video file and index
# Display statistics about the created storage

cfg = VectorStoreConfig(
    video=video_config,
    qrcode=qrcode_config
)
encoder = Encoder(config=cfg, index_manager=index_manager)
encoder.add_chunks(texts, metadata)
video_file = Path("test_video.mp4")
index_dir = Path("test_index.d")
stats = encoder.build_video(video_file, index_dir)

# Testing the Complete System
# Finally, we'll test the complete system by:
# Creating a retriever that can access both the video and index
# Performing similarity searches
# Verifying that results match our original queries

retriever = Retriever(
    video_file=video_file,
    index_dir=index_dir,
    config=cfg,
    index_manager=index_manager,
    k=2,
)
# Example searches
queries = [
    "Tell me about foxes",
    "What's the weather like?",
    "What programming languages are mentioned?"
]
results = [
    {
        "query": query,
        "text": doc.page_content,
        **{k: v for k, v in doc.metadata.items() if k != "text"}
    }
    for query in queries
    for doc in retriever.retrieve(query)
]
The complete example demonstrates:
Working with individual components (Encoder, IndexManager, etc.)
Custom video and QR code configurations
Direct video processing and manipulation
Building complete systems from components
Testing and verification of functionality
For the interactive Jupyter notebook version, see Jupyter Notebook Examples.