hybrid-search-implementation

Combine vector and keyword search for improved retrieval in RAG systems and search engines. Provides four fusion methods: Reciprocal Rank Fusion (RRF) for general use, linear combination for tunable balance, cross-encoder reranking for highest quality, and cascade filtering for efficiency Includes production-ready templates for PostgreSQL with pgvector, Elasticsearch with dense vectors, and custom Python pipelines with parallel search execution Handles score normalization, metadata filtering, and result deduplication across multiple search backends Supports reranking with cross-encoders and offers practical guidance on tuning weights empirically rather than assuming fixed configurations

INSTALLATION
npx skills add https://github.com/wshobson/agents --skill hybrid-search-implementation
Run in your project or agent environment. Adjust flags if your CLI version differs.

SKILL.md

Hybrid Search Implementation

Patterns for combining vector similarity and keyword-based search.

When to Use This Skill

  • Building RAG systems with improved recall
  • Combining semantic understanding with exact matching
  • Handling queries with specific terms (names, codes)
  • Improving search for domain-specific vocabulary
  • When pure vector search misses keyword matches

Core Concepts

1. Hybrid Search Architecture

Query → ┬─► Vector Search ──► Candidates ─┐

        │                                  │

        └─► Keyword Search ─► Candidates ─┴─► Fusion ─► Results

2. Fusion Methods

Method

Description

Best For

RRF

Reciprocal Rank Fusion

General purpose

Linear

Weighted sum of scores

Tunable balance

Cross-encoder

Rerank with neural model

Highest quality

Cascade

Filter then rerank

Efficiency

Templates

Template 1: Reciprocal Rank Fusion

from typing import List, Dict, Tuple

from collections import defaultdict

def reciprocal_rank_fusion(

    result_lists: List[List[Tuple[str, float]]],

    k: int = 60,

    weights: List[float] = None

) -> List[Tuple[str, float]]:

    """

    Combine multiple ranked lists using RRF.

    Args:

        result_lists: List of (doc_id, score) tuples per search method

        k: RRF constant (higher = more weight to lower ranks)

        weights: Optional weights per result list

    Returns:

        Fused ranking as (doc_id, score) tuples

    """

    if weights is None:

        weights = [1.0] * len(result_lists)

    scores = defaultdict(float)

    for result_list, weight in zip(result_lists, weights):

        for rank, (doc_id, _) in enumerate(result_list):

            # RRF formula: 1 / (k + rank)

            scores[doc_id] += weight * (1.0 / (k + rank + 1))

    # Sort by fused score

    return sorted(scores.items(), key=lambda x: x[1], reverse=True)

def linear_combination(

    vector_results: List[Tuple[str, float]],

    keyword_results: List[Tuple[str, float]],

    alpha: float = 0.5

) -> List[Tuple[str, float]]:

    """

    Combine results with linear interpolation.

    Args:

        vector_results: (doc_id, similarity_score) from vector search

        keyword_results: (doc_id, bm25_score) from keyword search

        alpha: Weight for vector search (1-alpha for keyword)

    """

    # Normalize scores to [0, 1]

    def normalize(results):

        if not results:

            return {}

        scores = [s for _, s in results]

        min_s, max_s = min(scores), max(scores)

        range_s = max_s - min_s if max_s != min_s else 1

        return {doc_id: (score - min_s) / range_s for doc_id, score in results}

    vector_scores = normalize(vector_results)

    keyword_scores = normalize(keyword_results)

    # Combine

    all_docs = set(vector_scores.keys()) | set(keyword_scores.keys())

    combined = {}

    for doc_id in all_docs:

        v_score = vector_scores.get(doc_id, 0)

        k_score = keyword_scores.get(doc_id, 0)

        combined[doc_id] = alpha * v_score + (1 - alpha) * k_score

    return sorted(combined.items(), key=lambda x: x[1], reverse=True)

Template 2: PostgreSQL Hybrid Search

import asyncpg

from typing import List, Dict, Optional

import numpy as np

class PostgresHybridSearch:

    """Hybrid search with pgvector and full-text search."""

    def __init__(self, pool: asyncpg.Pool):

        self.pool = pool

    async def setup_schema(self):

        """Create tables and indexes."""

        async with self.pool.acquire() as conn:

            await conn.execute("""

                CREATE EXTENSION IF NOT EXISTS vector;

                CREATE TABLE IF NOT EXISTS documents (

                    id TEXT PRIMARY KEY,

                    content TEXT NOT NULL,

                    embedding vector(1536),

                    metadata JSONB DEFAULT '{}',

                    ts_content tsvector GENERATED ALWAYS AS (

                        to_tsvector('english', content)

                    ) STORED

                );

                -- Vector index (HNSW)

                CREATE INDEX IF NOT EXISTS documents_embedding_idx

                ON documents USING hnsw (embedding vector_cosine_ops);

                -- Full-text index (GIN)

                CREATE INDEX IF NOT EXISTS documents_fts_idx

                ON documents USING gin (ts_content);

            """)

    async def hybrid_search(

        self,

        query: str,

        query_embedding: List[float],

        limit: int = 10,

        vector_weight: float = 0.5,

        filter_metadata: Optional[Dict] = None

    ) -> List[Dict]:

        """

        Perform hybrid search combining vector and full-text.

        Uses RRF fusion for combining results.

        """

        async with self.pool.acquire() as conn:

            # Build filter clause

            where_clause = "1=1"

            params = [query_embedding, query, limit * 3]

            if filter_metadata:

                for key, value in filter_metadata.items():

                    params.append(value)

                    where_clause += f" AND metadata->>'{key}' = ${len(params)}"

            results = await conn.fetch(f"""

                WITH vector_search AS (

                    SELECT

                        id,

                        content,

                        metadata,

                        ROW_NUMBER() OVER (ORDER BY embedding <=> $1::vector) as vector_rank,

                        1 - (embedding <=> $1::vector) as vector_score

                    FROM documents

                    WHERE {where_clause}

                    ORDER BY embedding <=> $1::vector

                    LIMIT $3

                ),

                keyword_search AS (

                    SELECT

                        id,

                        content,

                        metadata,

                        ROW_NUMBER() OVER (ORDER BY ts_rank(ts_content, websearch_to_tsquery('english', $2)) DESC) as keyword_rank,

                        ts_rank(ts_content, websearch_to_tsquery('english', $2)) as keyword_score

                    FROM documents

                    WHERE ts_content @@ websearch_to_tsquery('english', $2)

                      AND {where_clause}

                    ORDER BY ts_rank(ts_content, websearch_to_tsquery('english', $2)) DESC

                    LIMIT $3

                )

                SELECT

                    COALESCE(v.id, k.id) as id,

                    COALESCE(v.content, k.content) as content,

                    COALESCE(v.metadata, k.metadata) as metadata,

                    v.vector_score,

                    k.keyword_score,

                    -- RRF fusion

                    COALESCE(1.0 / (60 + v.vector_rank), 0) * $4::float +

                    COALESCE(1.0 / (60 + k.keyword_rank), 0) * (1 - $4::float) as rrf_score

                FROM vector_search v

                FULL OUTER JOIN keyword_search k ON v.id = k.id

                ORDER BY rrf_score DESC

                LIMIT $3 / 3

            """, *params, vector_weight)

            return [dict(row) for row in results]

    async def search_with_rerank(

        self,

        query: str,

        query_embedding: List[float],

        limit: int = 10,

        rerank_candidates: int = 50

    ) -> List[Dict]:

        """Hybrid search with cross-encoder reranking."""

        from sentence_transformers import CrossEncoder

        # Get candidates

        candidates = await self.hybrid_search(

            query, query_embedding, limit=rerank_candidates

        )

        if not candidates:

            return []

        # Rerank with cross-encoder

        model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

        pairs = [(query, c["content"]) for c in candidates]

        scores = model.predict(pairs)

        for candidate, score in zip(candidates, scores):

            candidate["rerank_score"] = float(score)

        # Sort by rerank score and return top results

        reranked = sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)

        return reranked[:limit]

Template 3: Elasticsearch Hybrid Search

from elasticsearch import Elasticsearch

from typing import List, Dict, Optional

class ElasticsearchHybridSearch:

    """Hybrid search with Elasticsearch and dense vectors."""

    def __init__(

        self,

        es_client: Elasticsearch,

        index_name: str = "documents"

    ):

        self.es = es_client

        self.index_name = index_name

    def create_index(self, vector_dims: int = 1536):

        """Create index with dense vector and text fields."""

        mapping = {

            "mappings": {

                "properties": {

                    "content": {

                        "type": "text",

                        "analyzer": "english"

                    },

                    "embedding": {

                        "type": "dense_vector",

                        "dims": vector_dims,

                        "index": True,

                        "similarity": "cosine"

                    },

                    "metadata": {

                        "type": "object",

                        "enabled": True

                    }

                }

            }

        }

        self.es.indices.create(index=self.index_name, body=mapping, ignore=400)

    def hybrid_search(

        self,

        query: str,

        query_embedding: List[float],

        limit: int = 10,

        boost_vector: float = 1.0,

        boost_text: float = 1.0,

        filter: Optional[Dict] = None

    ) -> List[Dict]:

        """

        Hybrid search using Elasticsearch's built-in capabilities.

        """

        # Build the hybrid query

        search_body = {

            "size": limit,

            "query": {

                "bool": {

                    "should": [

                        # Vector search (kNN)

                        {

                            "script_score": {

                                "query": {"match_all": {}},

                                "script": {

                                    "source": f"cosineSimilarity(params.query_vector, 'embedding') * {boost_vector} + 1.0",

                                    "params": {"query_vector": query_embedding}

                                }

                            }

                        },

                        # Text search (BM25)

                        {

                            "match": {

                                "content": {

                                    "query": query,

                                    "boost": boost_text

                                }

                            }

                        }

                    ],

                    "minimum_should_match": 1

                }

            }

        }

        # Add filter if provided

        if filter:

            search_body["query"]["bool"]["filter"] = filter

        response = self.es.search(index=self.index_name, body=search_body)

        return [

            {

                "id": hit["_id"],

                "content": hit["_source"]["content"],

                "metadata": hit["_source"].get("metadata", {}),

                "score": hit["_score"]

            }

            for hit in response["hits"]["hits"]

        ]

    def hybrid_search_rrf(

        self,

        query: str,

        query_embedding: List[float],

        limit: int = 10,

        window_size: int = 100

    ) -> List[Dict]:

        """

        Hybrid search using Elasticsearch 8.x RRF.

        """

        search_body = {

            "size": limit,

            "sub_searches": [

                {

                    "query": {

                        "match": {

                            "content": query

                        }

                    }

                },

                {

                    "query": {

                        "knn": {

                            "field": "embedding",

                            "query_vector": query_embedding,

                            "k": window_size,

                            "num_candidates": window_size * 2

                        }

                    }

                }

            ],

            "rank": {

                "rrf": {

                    "window_size": window_size,

                    "rank_constant": 60

                }

            }

        }

        response = self.es.search(index=self.index_name, body=search_body)

        return [

            {

                "id": hit["_id"],

                "content": hit["_source"]["content"],

                "score": hit["_score"]

            }

            for hit in response["hits"]["hits"]

        ]

Template 4: Custom Hybrid RAG Pipeline

from typing import List, Dict, Optional, Callable

from dataclasses import dataclass

@dataclass

class SearchResult:

    id: str

    content: str

    score: float

    source: str  # "vector", "keyword", "hybrid"

    metadata: Dict = None

class HybridRAGPipeline:

    """Complete hybrid search pipeline for RAG."""

    def __init__(

        self,

        vector_store,

        keyword_store,

        embedder,

        reranker=None,

        fusion_method: str = "rrf",

        vector_weight: float = 0.5

    ):

        self.vector_store = vector_store

        self.keyword_store = keyword_store

        self.embedder = embedder

        self.reranker = reranker

        self.fusion_method = fusion_method

        self.vector_weight = vector_weight

    async def search(

        self,

        query: str,

        top_k: int = 10,

        filter: Optional[Dict] = None,

        use_rerank: bool = True

    ) -> List[SearchResult]:

        """Execute hybrid search pipeline."""

        # Step 1: Get query embedding

        query_embedding = self.embedder.embed(query)

        # Step 2: Execute parallel searches

        vector_results, keyword_results = await asyncio.gather(

            self._vector_search(query_embedding, top_k * 3, filter),

            self._keyword_search(query, top_k * 3, filter)

        )

        # Step 3: Fuse results

        if self.fusion_method == "rrf":

            fused = self._rrf_fusion(vector_results, keyword_results)

        else:

            fused = self._linear_fusion(vector_results, keyword_results)

        # Step 4: Rerank if enabled

        if use_rerank and self.reranker:

            fused = await self._rerank(query, fused[:top_k * 2])

        return fused[:top_k]

    async def _vector_search(

        self,

        embedding: List[float],

        limit: int,

        filter: Dict

    ) -> List[SearchResult]:

        results = await self.vector_store.search(embedding, limit, filter)

        return [

            SearchResult(

                id=r["id"],

                content=r["content"],

                score=r["score"],

                source="vector",

                metadata=r.get("metadata")

            )

            for r in results

        ]

    async def _keyword_search(

        self,

        query: str,

        limit: int,

        filter: Dict

    ) -> List[SearchResult]:

        results = await self.keyword_store.search(query, limit, filter)

        return [

            SearchResult(

                id=r["id"],

                content=r["content"],

                score=r["score"],

                source="keyword",

                metadata=r.get("metadata")

            )

            for r in results

        ]

    def _rrf_fusion(

        self,

        vector_results: List[SearchResult],

        keyword_results: List[SearchResult]

    ) -> List[SearchResult]:

        """Fuse with RRF."""

        k = 60

        scores = {}

        content_map = {}

        for rank, result in enumerate(vector_results):

            scores[result.id] = scores.get(result.id, 0) + 1 / (k + rank + 1)

            content_map[result.id] = result

        for rank, result in enumerate(keyword_results):

            scores[result.id] = scores.get(result.id, 0) + 1 / (k + rank + 1)

            if result.id not in content_map:

                content_map[result.id] = result

        sorted_ids = sorted(scores.keys(), key=lambda x: scores[x], reverse=True)

        return [

            SearchResult(

                id=doc_id,

                content=content_map[doc_id].content,

                score=scores[doc_id],

                source="hybrid",

                metadata=content_map[doc_id].metadata

            )

            for doc_id in sorted_ids

        ]

    async def _rerank(

        self,

        query: str,

        results: List[SearchResult]

    ) -> List[SearchResult]:

        """Rerank with cross-encoder."""

        if not results:

            return results

        pairs = [(query, r.content) for r in results]

        scores = self.reranker.predict(pairs)

        for result, score in zip(results, scores):

            result.score = float(score)

        return sorted(results, key=lambda x: x.score, reverse=True)

Best Practices

Do's

  • Tune weights empirically - Test on your data
  • Use RRF for simplicity - Works well without tuning
  • Add reranking - Significant quality improvement
  • Log both scores - Helps with debugging
  • A/B test - Measure real user impact

Don'ts

  • Don't assume one size fits all - Different queries need different weights
  • Don't skip keyword search - Handles exact matches better
  • Don't over-fetch - Balance recall vs latency
  • Don't ignore edge cases - Empty results, single word queries
BrowserAct

Let your agent run on any real-world website

Bypass CAPTCHA & anti-bot for free. Start local, scale to cloud.

Explore BrowserAct Skills →

Stop writing automation&scrapers

Install the CLI. Run your first Skill in 30 seconds. Scale when you're ready.

Start free
free · no credit card