Skip to main content
Article
vector-databasesemantic-searchhybrid-searchragpythondockerauto-vectorization

Build a Semantic Search Engine with Weaviate

Launch a local Weaviate instance using Docker. Use the Python client to define a schema, import data with auto-vectorization, and perform semantic searches. Get from zero to a working vector search application in minutes.

beginner15 min4 steps
The play
  1. Launch Weaviate with Docker
    The fastest way to run Weaviate is with Docker. Create a `docker-compose.yml` file with this content. It starts Weaviate with a module that automatically vectorizes text, no API key needed. Run `docker-compose up -d` in your terminal to start it.
  2. Connect and Define a Schema
    Install the Python client (`pip install weaviate-client`). Then, connect to your local Weaviate instance and define a 'class' (schema). This class tells Weaviate what properties your data objects have and that they should be vectorized.
  3. Ingest Data with Auto-Vectorization
    Get a handle to your new collection and add data objects. Weaviate's `text2vec-transformers` module automatically reads the `properties` and generates a vector embedding for each object upon import.
  4. Run a Vector Search
    Perform a semantic search using `near_text`. This finds objects whose content is conceptually closest to your query. Weaviate handles vectorizing your query and finding the most similar stored objects.
Starter code
import weaviate
import weaviate.classes.config as wc
import sys

CLASS_NAME = "Question"

def run_weaviate_demo():
    """A complete demo to create, populate, and query a Weaviate collection."""
    try:
        # Connect to the local Weaviate instance set up by Docker
        client = weaviate.connect_to_local()
        print("Successfully connected to Weaviate.")

        # 1. Define Schema
        if client.collections.exists(CLASS_NAME):
            print(f"Deleting existing '{CLASS_NAME}' collection.")
            client.collections.delete(CLASS_NAME)

        print(f"Creating new '{CLASS_NAME}' collection...")
        client.collections.create(
            name=CLASS_NAME,
            vectorizer_config=wc.Configure.Vectorizer.text2vec_transformers(),
            properties=[
                wc.Property(name="question", data_type=wc.DataType.TEXT),
                wc.Property(name="answer", data_type=wc.DataType.TEXT, skip_vectorization=True),
            ]
        )
        print("Schema created.")

        # 2. Import Data
        questions_collection = client.collections.get(CLASS_NAME)
        data_to_import = [
            {"question": "What is the capital of France?", "answer": "Paris"},
            {"question": "What is the tallest mountain?", "answer": "Mount Everest"},
            {"question": "Who wrote 'To Kill a Mockingbird'?", "answer": "Harper Lee"},
            {"question": "Which nation has the most people?", "answer": "India"}
        ]
        
        print(f"Importing {len(data_to_import)} data objects...")
        result = questions_collection.data.insert_many(data_to_import)
        if result.has_errors:
            print(f"Import errors: {result.errors}", file=sys.stderr)
            return
        print("Data imported successfully.")

        # 3. Perform Vector Search
        query = "geographical locations"
        print(f"\nPerforming vector search for: '{query}'")
        response = questions_collection.query.near_text(
            query=query,
            limit=2
        )

        print("Search results:")
        for item in response.objects:
            print(f"  - Question: {item.properties['question']}")

    except Exception as e:
        print(f"An error occurred: {e}", file=sys.stderr)
        print("\nPlease ensure Weaviate is running via `docker-compose up -d`.", file=sys.stderr)

    finally:
        if 'client' in locals() and client.is_connected():
            client.close()
            print("\nConnection to Weaviate closed.")

if __name__ == "__main__":
    # Prerequisites: 
    # 1. Have Docker installed and running.
    # 2. Save the docker-compose.yml from Step 1.
    # 3. Run `docker-compose up -d` in the same directory.
    # 4. Run `pip install weaviate-client`.
    run_weaviate_demo()
Build a Semantic Search Engine with Weaviate — Action Pack