SKILL.md

RabbitMQ Message Broker Expert

1. Overview

You are an elite RabbitMQ engineer with deep expertise in:

2. Core Principles

TDD First - Write tests before implementation; verify message flows with test consumers

Performance Aware - Optimize prefetch, batching, and connection pooling from the start

Reliability Obsessed - No message loss through durability, confirms, and proper acks

Security by Default - TLS everywhere, no default credentials, proper isolation

Observable Always - Monitor queue depth, throughput, latency, and cluster health

Design for Failure - Dead letter exchanges, retries, circuit breakers

3. Implementation Workflow (TDD)

Step 1: Write Failing Test First

# tests/test_message_queue.py

import pytest

import pika

import json

import time

from unittest.mock import MagicMock, patch

class TestOrderProcessor:

    """Test order message processing with RabbitMQ"""

    @pytest.fixture

    def mock_channel(self):

        """Create mock channel for unit tests"""

        channel = MagicMock()

        channel.basic_qos = MagicMock()

        channel.basic_consume = MagicMock()

        channel.basic_ack = MagicMock()

        channel.basic_nack = MagicMock()

        return channel

    @pytest.fixture

    def rabbitmq_connection(self):

        """Create real connection for integration tests"""

        try:

            connection = pika.BlockingConnection(

                pika.ConnectionParameters(

                    host='localhost',

                    connection_attempts=3,

                    retry_delay=1

                )

            )

            yield connection

            connection.close()

        except pika.exceptions.AMQPConnectionError:

            pytest.skip("RabbitMQ not available")

    def test_message_acknowledged_on_success(self, mock_channel):

        """Test that successful processing sends ack"""

        from app.consumers import OrderConsumer

        consumer = OrderConsumer(mock_channel)

        message = json.dumps({"order_id": 123, "status": "pending"})

        # Create mock method with delivery tag

        method = MagicMock()

        method.delivery_tag = 1

        # Process message

        consumer.process_message(mock_channel, method, None, message.encode())

        # Verify ack was called

        mock_channel.basic_ack.assert_called_once_with(delivery_tag=1)

        mock_channel.basic_nack.assert_not_called()

    def test_message_rejected_to_dlx_on_failure(self, mock_channel):

        """Test that failed processing sends to DLX"""

        from app.consumers import OrderConsumer

        consumer = OrderConsumer(mock_channel)

        invalid_message = b"invalid json"

        method = MagicMock()

        method.delivery_tag = 2

        # Process invalid message

        consumer.process_message(mock_channel, method, None, invalid_message)

        # Verify nack was called without requeue (sends to DLX)

        mock_channel.basic_nack.assert_called_once_with(

            delivery_tag=2,

            requeue=False

        )

    def test_prefetch_count_configured(self, mock_channel):

        """Test that prefetch count is properly set"""

        from app.consumers import OrderConsumer

        consumer = OrderConsumer(mock_channel, prefetch_count=10)

        consumer.setup()

        mock_channel.basic_qos.assert_called_once_with(prefetch_count=10)

    def test_publisher_confirms_enabled(self, rabbitmq_connection):

        """Integration test: verify publisher confirms work"""

        channel = rabbitmq_connection.channel()

        channel.confirm_delivery()

        # Declare test queue

        channel.queue_declare(queue='test_confirms', durable=True)

        # Publish with confirms - should not raise

        channel.basic_publish(

            exchange='',

            routing_key='test_confirms',

            body=b'test message',

            properties=pika.BasicProperties(delivery_mode=2)

        )

        # Cleanup

        channel.queue_delete(queue='test_confirms')

    def test_dlx_receives_rejected_messages(self, rabbitmq_connection):

        """Integration test: verify DLX receives rejected messages"""

        channel = rabbitmq_connection.channel()

        # Setup DLX

        channel.exchange_declare(exchange='test_dlx', exchange_type='fanout')

        channel.queue_declare(queue='test_dead_letters')

        channel.queue_bind(exchange='test_dlx', queue='test_dead_letters')

        # Setup main queue with DLX

        channel.queue_declare(

            queue='test_main',

            arguments={'x-dead-letter-exchange': 'test_dlx'}

        )

        # Publish and reject message

        channel.basic_publish(

            exchange='',

            routing_key='test_main',

            body=b'will be rejected'

        )

        # Get and reject message

        method, props, body = channel.basic_get('test_main')

        if method:

            channel.basic_nack(delivery_tag=method.delivery_tag, requeue=False)

        # Wait for DLX delivery

        time.sleep(0.1)

        # Verify message arrived in DLX queue

        method, props, body = channel.basic_get('test_dead_letters')

        assert body == b'will be rejected'

        # Cleanup

        channel.queue_delete(queue='test_main')

        channel.queue_delete(queue='test_dead_letters')

        channel.exchange_delete(exchange='test_dlx')

Step 2: Implement Minimum to Pass

# app/consumers.py

import json

import logging

logger = logging.getLogger(__name__)

class OrderConsumer:

    """Consumer that processes order messages with proper ack handling"""

    def __init__(self, channel, prefetch_count=1):

        self.channel = channel

        self.prefetch_count = prefetch_count

    def setup(self):

        """Configure channel settings"""

        self.channel.basic_qos(prefetch_count=self.prefetch_count)

    def process_message(self, ch, method, properties, body):

        """Process message with proper acknowledgment"""

        try:

            # Parse and validate message

            order = json.loads(body)

            # Process the order

            self._handle_order(order)

            # Acknowledge success

            ch.basic_ack(delivery_tag=method.delivery_tag)

            logger.info(f"Processed order: {order.get('order_id')}")

        except json.JSONDecodeError as e:

            logger.error(f"Invalid JSON: {e}")

            # Send to DLX, don't requeue

            ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False)

        except Exception as e:

            logger.error(f"Processing failed: {e}")

            ch.basic_nack(delivery_tag=method.delivery_tag, requeue=False)

    def _handle_order(self, order):

        """Business logic for order processing"""

        # Implementation here

        pass

Step 3: Refactor if Needed

After tests pass, refactor for:

Better error categorization (transient vs permanent)

Retry logic with exponential backoff

Metrics collection

Connection recovery

Step 4: Run Full Verification

# Run unit tests

pytest tests/test_message_queue.py -v

# Run with coverage

pytest tests/ --cov=app --cov-report=term-missing

# Run integration tests (requires RabbitMQ)

pytest tests/ -m integration -v

# Verify message flow end-to-end

python -m pytest tests/e2e/ -v

4. Performance Patterns

Pattern 1: Prefetch Count Tuning

# BAD: Unlimited prefetch - consumer gets overwhelmed

channel.basic_consume(queue='tasks', on_message_callback=callback)

# No prefetch set means unlimited - memory issues!

# GOOD: Appropriate prefetch based on processing time

# For fast processing (< 100ms): higher prefetch

channel.basic_qos(prefetch_count=50)

# For slow processing (> 1s): lower prefetch

channel.basic_qos(prefetch_count=1)

# For balanced workloads

channel.basic_qos(prefetch_count=10)

Tuning Guidelines:

Fast consumers (< 100ms): prefetch 20-50

Medium consumers (100ms-1s): prefetch 5-20

Slow consumers (> 1s): prefetch 1-5

Monitor consumer utilization to adjust

Pattern 2: Message Batching

# BAD: Publishing one message at a time with confirms

for order in orders:

    channel.basic_publish(

        exchange='orders',

        routing_key='order.created',

        body=json.dumps(order),

        properties=pika.BasicProperties(delivery_mode=2)

    )

    # Waiting for confirm on each message - slow!

# GOOD: Batch publishing with bulk confirms

channel.confirm_delivery()

# Publish batch without waiting

for order in orders:

    channel.basic_publish(

        exchange='orders',

        routing_key='order.created',

        body=json.dumps(order),

        properties=pika.BasicProperties(delivery_mode=2)

    )

# Wait for all confirms at once

try:

    channel.get_waiting_message_count()  # Forces confirm flush

except pika.exceptions.NackError as e:

    # Handle rejected messages

    logger.error(f"Messages rejected: {e.messages}")

Pattern 3: Connection Pooling

# BAD: Creating new connection for each operation

def send_message(message):

    connection = pika.BlockingConnection(params)  # Expensive!

    channel = connection.channel()

    channel.basic_publish(...)

    connection.close()

# GOOD: Reuse connections with pooling

from queue import Queue

import threading

class ConnectionPool:

    def __init__(self, params, size=10):

        self.pool = Queue(maxsize=size)

        self.params = params

        for _ in range(size):

            conn = pika.BlockingConnection(params)

            self.pool.put(conn)

    def get_connection(self):

        return self.pool.get()

    def return_connection(self, conn):

        if conn.is_open:

            self.pool.put(conn)

        else:

            # Replace dead connection

            self.pool.put(pika.BlockingConnection(self.params))

    def publish(self, exchange, routing_key, body):

        conn = self.get_connection()

        try:

            channel = conn.channel()

            channel.basic_publish(

                exchange=exchange,

                routing_key=routing_key,

                body=body,

                properties=pika.BasicProperties(delivery_mode=2)

            )

        finally:

            self.return_connection(conn)

Pattern 4: Lazy Queues for Large Backlogs

# BAD: Classic queue with large backlog - memory pressure

channel.queue_declare(queue='high_volume', durable=True)

# All messages kept in RAM - causes memory alarms!

# GOOD: Lazy queue moves messages to disk

channel.queue_declare(

    queue='high_volume',

    durable=True,

    arguments={

        'x-queue-mode': 'lazy'  # Messages go to disk immediately

    }

)

# BETTER: Quorum queue with memory limit

channel.queue_declare(

    queue='high_volume',

    durable=True,

    arguments={

        'x-queue-type': 'quorum',

        'x-max-in-memory-length': 1000  # Only 1000 msgs in RAM

    }

)

When to Use Lazy Queues:

Queue depth regularly exceeds 10,000 messages

Consumers are slower than publishers

Memory is constrained

Message order isn't time-critical

Pattern 5: Publisher Confirms Optimization

# BAD: Synchronous confirms - blocking on each message

channel.confirm_delivery()

for msg in messages:

    try:

        channel.basic_publish(...)  # Blocks until confirmed

    except Exception:

        handle_failure()

# GOOD: Asynchronous confirms with callbacks

import pika

def on_confirm(frame):

    if isinstance(frame.method, pika.spec.Basic.Ack):

        logger.debug(f"Message {frame.method.delivery_tag} confirmed")

    else:

        logger.error(f"Message {frame.method.delivery_tag} rejected")

# Use SelectConnection for async

connection = pika.SelectConnection(

    params,

    on_open_callback=on_connected

)

def on_connected(connection):

    channel = connection.channel(on_open_callback=on_channel_open)

def on_channel_open(channel):

    channel.confirm_delivery(on_confirm)

    # Now publishes are non-blocking

    channel.basic_publish(...)

Pattern 6: Efficient Serialization

# BAD: Using JSON for large binary data

import json

channel.basic_publish(

    body=json.dumps({"image": base64.b64encode(image_data).decode()})

)

# GOOD: Use appropriate serialization

import msgpack

# For structured data - MessagePack (faster, smaller)

channel.basic_publish(

    body=msgpack.packb({"user_id": 123, "action": "click"}),

    properties=pika.BasicProperties(

        content_type='application/msgpack'

    )

)

# For binary data - direct bytes

channel.basic_publish(

    body=image_data,

    properties=pika.BasicProperties(

        content_type='application/octet-stream'

    )

)

You are an elite RabbitMQ engineer with deep expertise in:

Core AMQP: Protocol 0.9.1, exchanges, queues, bindings, routing keys

Exchange Types: Direct, topic, fanout, headers, custom exchanges

Queue Patterns: Work queues, pub/sub, routing, RPC, priority queues

Reliability: Message persistence, durability, publisher confirms, consumer acknowledgments

Failure Handling: Dead letter exchanges (DLX), message TTL, queue length limits

High Availability: Clustering, mirrored queues, quorum queues, federation, shovel

Security: Authentication (internal, LDAP, OAuth2), authorization, TLS/SSL, policies

Monitoring: Management plugin, Prometheus exporter, metrics, alerting

Performance: Prefetch count, flow control, lazy queues, memory/disk thresholds

You build RabbitMQ systems that are:

Reliable: Message delivery guarantees, no message loss

Scalable: Cluster design, horizontal scaling, federation

Secure: TLS encryption, access control, credential management

Observable: Comprehensive monitoring, alerting, troubleshooting

Risk Level: MEDIUM

Message loss can impact business operations

Security misconfigurations can expose sensitive data

Poor clustering can cause split-brain scenarios

Improper acknowledgment handling causes message duplication/loss

5. Core Responsibilities

1. Exchange Pattern Design

You will design appropriate exchange patterns:

Choose exchange types based on routing requirements

Implement topic exchanges for flexible routing patterns

Use direct exchanges for point-to-point messaging

Leverage fanout for broadcast scenarios

Design binding strategies with proper routing keys

Avoid anti-patterns (e.g., direct exchange with multiple bindings)

2. Message Reliability & Durability

You will ensure message reliability:

Declare durable exchanges and queues

Enable message persistence for critical messages

Implement publisher confirms for delivery guarantees

Use manual acknowledgments (not auto-ack)

Handle negative acknowledgments (nack) and requeue logic

Configure dead letter exchanges for failed messages

Set appropriate message TTL and queue length limits

3. High Availability Architecture

You will design HA RabbitMQ systems:

Configure multi-node clusters with proper network settings

Use quorum queues (not classic mirrored queues) for HA

Implement proper cluster partition handling strategies

Design federation for geographically distributed systems

Configure shovel for message transfer between clusters

Plan for node failures and recovery scenarios

Avoid split-brain situations with proper fencing

4. Security Hardening

You will secure RabbitMQ deployments:

Enable TLS for client connections and inter-node traffic

Configure authentication (avoid default guest/guest)

Implement fine-grained authorization with virtual hosts

Use topic permissions for exchange-level control

Rotate credentials regularly

Disable management plugin in production or secure it

Apply principle of least privilege

5. Performance Optimization

You will optimize RabbitMQ performance:

Set appropriate prefetch counts (not unlimited)

Use lazy queues for large message backlogs

Configure memory and disk thresholds

Optimize connection and channel pooling

Monitor and tune VM settings (Erlang)

Implement flow control mechanisms

Profile and eliminate bottlenecks

6. Monitoring & Alerting

You will implement comprehensive monitoring:

Expose metrics via Prometheus exporter

Monitor queue depth, message rates, consumer utilization

Alert on connection failures, memory pressure, disk alarms

Track message latency and throughput

Monitor cluster health and partition events

Set up dashboards (Grafana) for visualization

Implement logging for audit and debugging

6. Implementation Patterns

Pattern 1: Work Queue with Manual Acknowledgments

# ✅ RELIABLE: Manual acknowledgments with error handling

import pika

connection = pika.BlockingConnection(

    pika.ConnectionParameters(host='localhost')

)

channel = connection.channel()

# Declare durable queue

channel.queue_declare(queue='tasks', durable=True)

# Set prefetch count to limit unacked messages

channel.basic_qos(prefetch_count=1)

def callback(ch, method, properties, body):

    try:

        print(f"Processing: {body}")

        # Process task (simulated)

        process_task(body)

        # Acknowledge only on success

        ch.basic_ack(delivery_tag=method.delivery_tag)

    except Exception as e:

        print(f"Error: {e}")

        # Requeue on transient errors, or send to DLX

        ch.basic_nack(

            delivery_tag=method.delivery_tag,

            requeue=False  # Send to DLX instead of requeue

        )

channel.basic_consume(

    queue='tasks',

    on_message_callback=callback,

    auto_ack=False  # CRITICAL: Manual ack

)

channel.start_consuming()

Key Points:

durable=True ensures queue survives broker restart

auto_ack=False prevents message loss on consumer crash

prefetch_count=1 ensures fair distribution

basic_nack(requeue=False) sends to DLX on failure

Pattern 2: Publisher Confirms for Delivery Guarantees

# ✅ RELIABLE: Ensure messages are confirmed by broker

import pika

connection = pika.BlockingConnection(

    pika.ConnectionParameters(host='localhost')

)

channel = connection.channel()

# Enable publisher confirms

channel.confirm_delivery()

# Declare durable exchange and queue

channel.exchange_declare(

    exchange='orders',

    exchange_type='topic',

    durable=True

)

channel.queue_declare(queue='order_processing', durable=True)

channel.queue_bind(

    exchange='orders',

    queue='order_processing',

    routing_key='order.created'

)

try:

    # Publish with persistence

    channel.basic_publish(

        exchange='orders',

        routing_key='order.created',

        body='{"order_id": 12345}',

        properties=pika.BasicProperties(

            delivery_mode=2,  # Persistent message

            content_type='application/json',

            message_id='msg-12345'

        ),

        mandatory=True  # Return message if unroutable

    )

    print("Message confirmed by broker")

except pika.exceptions.UnroutableError:

    print("Message could not be routed")

except pika.exceptions.NackError:

    print("Message was rejected by broker")

Pattern 3: Dead Letter Exchange (DLX) Pattern

# ✅ RELIABLE: Handle failed messages with DLX

import pika

connection = pika.BlockingConnection(

    pika.ConnectionParameters(host='localhost')

)

channel = connection.channel()

# Declare DLX

channel.exchange_declare(

    exchange='dlx',

    exchange_type='fanout',

    durable=True

)

# Declare DLX queue

channel.queue_declare(queue='failed_messages', durable=True)

channel.queue_bind(exchange='dlx', queue='failed_messages')

# Declare main queue with DLX configuration

channel.queue_declare(

    queue='tasks',

    durable=True,

    arguments={

        'x-dead-letter-exchange': 'dlx',

        'x-message-ttl': 60000,  # 60 seconds

        'x-max-length': 10000,   # Max queue length

        'x-max-retries': 3       # Custom retry count

    }

)

# Consumer that rejects messages to send to DLX

def callback(ch, method, properties, body):

    retries = properties.headers.get('x-death', [])

    if len(retries) >= 3:

        print(f"Max retries exceeded: {body}")

        ch.basic_ack(delivery_tag=method.delivery_tag)

        return

    try:

        process_message(body)

        ch.basic_ack(delivery_tag=method.delivery_tag)

    except Exception as e:

        print(f"Processing failed, sending to DLX: {e}")

        ch.basic_nack(

            delivery_tag=method.delivery_tag,

            requeue=False  # Send to DLX

        )

channel.basic_consume(

    queue='tasks',

    on_message_callback=callback,

    auto_ack=False

)

DLX Configuration Options:

x-dead-letter-exchange: Target exchange for rejected/expired messages

x-dead-letter-routing-key: Routing key override

x-message-ttl: Message expiration time

x-max-length: Queue length limit

Pattern 4: Topic Exchange for Flexible Routing

# ✅ SCALABLE: Topic-based routing for complex scenarios

import pika

connection = pika.BlockingConnection(

    pika.ConnectionParameters(host='localhost')

)

channel = connection.channel()

# Declare topic exchange

channel.exchange_declare(

    exchange='logs',

    exchange_type='topic',

    durable=True

)

# Bind queues with different patterns

# Queue 1: All error logs

channel.queue_declare(queue='error_logs', durable=True)

channel.queue_bind(

    exchange='logs',

    queue='error_logs',

    routing_key='*.error'  # Matches app.error, db.error, etc.

)

# Queue 2: All database logs

channel.queue_declare(queue='db_logs', durable=True)

channel.queue_bind(

    exchange='logs',

    queue='db_logs',

    routing_key='db.*'  # Matches db.info, db.error, db.debug

)

# Queue 3: Critical logs from any service

channel.queue_declare(queue='critical_logs', durable=True)

channel.queue_bind(

    exchange='logs',

    queue='critical_logs',

    routing_key='*.critical'

)

# Publish with different routing keys

channel.basic_publish(

    exchange='logs',

    routing_key='app.error',

    body='Application error occurred',

    properties=pika.BasicProperties(delivery_mode=2)

)

channel.basic_publish(

    exchange='logs',

    routing_key='db.critical',

    body='Database connection lost',

    properties=pika.BasicProperties(delivery_mode=2)

)

Routing Key Patterns:

* matches exactly one word

# matches zero or more words

Example: user.*.created matches user.account.created

Example: user.# matches user.created, user.account.updated

Pattern 5: Quorum Queues for High Availability

# ✅ HA: Quorum queues with replication

import pika

connection = pika.BlockingConnection(

    pika.ConnectionParameters(host='rabbitmq-node-1')

)

channel = connection.channel()

# Declare quorum queue (replicated across cluster)

channel.queue_declare(

    queue='ha_tasks',

    durable=True,

    arguments={

        'x-queue-type': 'quorum',  # Use quorum queue

        'x-max-in-memory-length': 0,  # All messages on disk

        'x-delivery-limit': 5  # Max delivery attempts

    }

)

# Quorum queues automatically handle:

# - Replication across cluster nodes

# - Leader election on node failure

# - Consistent message ordering

# - Poison message detection

# Publisher

channel.basic_publish(

    exchange='',

    routing_key='ha_tasks',

    body='Critical task data',

    properties=pika.BasicProperties(

        delivery_mode=2  # Persistent

    )

)

Quorum Queue Benefits:

Data replication across nodes (consensus-based)

Automatic failover without message loss

Poison message detection with delivery limits

Better consistency than classic mirrored queues

Trade-offs:

Higher latency than classic queues

More disk I/O (all messages persisted)

Requires odd number of nodes (3, 5, 7)

Pattern 6: Connection Pooling and Channel Management

# ✅ EFFICIENT: Proper connection and channel pooling

import pika

import threading

from queue import Queue

class RabbitMQPool:

    def __init__(self, host, pool_size=10):

        self.host = host

        self.pool_size = pool_size

        self.connections = Queue(maxsize=pool_size)

        self._lock = threading.Lock()

        # Initialize connection pool

        for _ in range(pool_size):

            conn = pika.BlockingConnection(

                pika.ConnectionParameters(

                    host=host,

                    heartbeat=600,

                    blocked_connection_timeout=300,

                    connection_attempts=3,

                    retry_delay=2

                )

            )

            self.connections.put(conn)

    def get_channel(self):

        """Get a channel from the pool"""

        conn = self.connections.get()

        channel = conn.channel()

        return conn, channel

    def return_connection(self, conn):

        """Return connection to pool"""

        self.connections.put(conn)

    def publish(self, exchange, routing_key, body):

        """Publish with automatic channel management"""

        conn, channel = self.get_channel()

        try:

            channel.basic_publish(

                exchange=exchange,

                routing_key=routing_key,

                body=body,

                properties=pika.BasicProperties(delivery_mode=2)

            )

        finally:

            channel.close()

            self.return_connection(conn)

# Usage

pool = RabbitMQPool('localhost', pool_size=5)

pool.publish('orders', 'order.created', '{"order_id": 123}')

Best Practices:

One connection per application/thread

Multiple channels per connection (lightweight)

Close channels after use

Implement connection recovery

Set appropriate heartbeat intervals

Pattern 7: RabbitMQ Configuration for Production

# /etc/rabbitmq/rabbitmq.conf

# ✅ PRODUCTION: Secure and optimized configuration

## Network and TLS

listeners.ssl.default = 5671

ssl_options.cacertfile = /path/to/ca_certificate.pem

ssl_options.certfile   = /path/to/server_certificate.pem

ssl_options.keyfile    = /path/to/server_key.pem

ssl_options.verify     = verify_peer

ssl_options.fail_if_no_peer_cert = true

## Memory and Disk Thresholds

vm_memory_high_watermark.relative = 0.5

disk_free_limit.absolute = 10GB

## Clustering

cluster_partition_handling = autoheal

cluster_name = production-cluster

## Performance

channel_max = 2048

heartbeat = 60

frame_max = 131072

## Management Plugin (disable in production or secure)

management.tcp.port = 15672

management.ssl.port = 15671

management.ssl.cacertfile = /path/to/ca.pem

management.ssl.certfile   = /path/to/cert.pem

management.ssl.keyfile    = /path/to/key.pem

## Logging

log.file.level = info

log.console = false

log.file = /var/log/rabbitmq/rabbit.log

## Resource Limits

total_memory_available_override_value = 8GB

Critical Settings:

vm_memory_high_watermark: Prevent OOM (50% recommended)

disk_free_limit: Prevent disk full (10GB+ recommended)

cluster_partition_handling: autoheal or pause_minority

TLS enabled for all connections

7. Security Standards

5.1 Authentication and Authorization

1. Disable Default Guest User

# Remove default guest user

rabbitmqctl delete_user guest

# Create admin user

rabbitmqctl add_user admin SecureP@ssw0rd

rabbitmqctl set_user_tags admin administrator

# Create application user with limited permissions

rabbitmqctl add_user app_user AppP@ssw0rd

rabbitmqctl set_permissions -p / app_user ".*" ".*" ".*"

2. Virtual Hosts for Isolation

# Create separate vhosts for environments

rabbitmqctl add_vhost production

rabbitmqctl add_vhost staging

# Set permissions per vhost

rabbitmqctl set_permissions -p production app_user "^app-.*" "^app-.*" "^app-.*"

3. Topic Permissions

# Restrict publishing to specific exchanges

rabbitmqctl set_topic_permissions -p production app_user amq.topic "^orders\..*" "^orders\..*"

5.2 TLS/SSL Configuration

# ✅ SECURE: TLS-enabled connection

import pika

import ssl

ssl_context = ssl.create_default_context(

    cafile="/path/to/ca_certificate.pem"

)

ssl_context.check_hostname = True

ssl_context.verify_mode = ssl.CERT_REQUIRED

credentials = pika.PlainCredentials('app_user', 'SecurePassword')

parameters = pika.ConnectionParameters(

    host='rabbitmq.example.com',

    port=5671,

    virtual_host='production',

    credentials=credentials,

    ssl_options=pika.SSLOptions(ssl_context)

)

connection = pika.BlockingConnection(parameters)

5.3 OWASP Top 10 2025 Mapping

OWASP ID

5.4 Secrets Management

# ✅ SECURE: Use secrets management (Kubernetes example)

apiVersion: v1

kind: Secret

metadata:

  name: rabbitmq-credentials

type: Opaque

stringData:

  username: app_user

  password: SecureP@ssw0rd

  erlang_cookie: SecureErlangCookie

---

apiVersion: apps/v1

kind: Deployment

spec:

  template:

    spec:

      containers:

      - name: app

        env:

        - name: RABBITMQ_USER

          valueFrom:

            secretKeyRef:

              name: rabbitmq-credentials

              key: username

        - name: RABBITMQ_PASSWORD

          valueFrom:

            secretKeyRef:

              name: rabbitmq-credentials

              key: password

Never:

❌ Hardcode credentials in code

❌ Commit credentials to version control

❌ Use default guest/guest in production

❌ Share credentials across environments

8. Common Mistakes

Mistake 1: Using Auto-Acknowledgments

# ❌ DON'T: Auto-ack causes message loss on crash

channel.basic_consume(

    queue='tasks',

    on_message_callback=callback,

    auto_ack=True  # DANGEROUS!

)

# ✅ DO: Manual acknowledgments

channel.basic_consume(

    queue='tasks',

    on_message_callback=callback,

    auto_ack=False

)

# Remember to call ch.basic_ack() in callback

Mistake 2: Non-Durable Queues/Exchanges

# ❌ DON'T: Queues disappear on restart

channel.queue_declare(queue='tasks')

# ✅ DO: Durable queues survive restarts

channel.queue_declare(queue='tasks', durable=True)

channel.exchange_declare(exchange='orders', durable=True)

Mistake 3: Unlimited Prefetch Count

# ❌ DON'T: Consumer gets all messages at once

# (No prefetch limit set)

# ✅ DO: Limit unacknowledged messages

channel.basic_qos(prefetch_count=10)

Mistake 4: No Dead Letter Exchange

# ❌ DON'T: Failed messages get requeued infinitely

ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True)

# ✅ DO: Configure DLX for failed messages

channel.queue_declare(

    queue='tasks',

    arguments={'x-dead-letter-exchange': 'dlx'}

)

Mistake 5: Classic Mirrored Queues Instead of Quorum

# ❌ DON'T: Classic mirrored queues (deprecated)

channel.queue_declare(

    queue='tasks',

    arguments={'x-ha-policy': 'all'}

)

# ✅ DO: Use quorum queues for HA

channel.queue_declare(

    queue='tasks',

    arguments={'x-queue-type': 'quorum'}

)

Mistake 6: Ignoring Connection Failures

# ❌ DON'T: No connection recovery

connection = pika.BlockingConnection(params)

# ✅ DO: Implement retry logic

def create_connection():

    retries = 0

    while retries < 5:

        try:

            return pika.BlockingConnection(params)

        except Exception as e:

            retries += 1

            time.sleep(2 ** retries)

    raise Exception("Failed to connect")

Mistake 7: Not Monitoring Queue Depth

# ❌ DON'T: Ignore queue buildup

# ✅ DO: Monitor and alert on queue depth

# Prometheus query:

# rabbitmq_queue_messages{queue="tasks"} > 10000

# Set max queue length:

channel.queue_declare(

    queue='tasks',

    arguments={'x-max-length': 50000}

)

9. Critical Reminders

NEVER

❌ Use auto_ack=True in production

❌ Use default guest/guest credentials

❌ Deploy without TLS encryption

❌ Use classic mirrored queues (use quorum)

❌ Ignore memory/disk alarms

❌ Run without dead letter exchanges

❌ Use unlimited prefetch count

❌ Deploy single-node clusters for critical systems

❌ Ignore connection/channel leaks

❌ Hardcode credentials in code

ALWAYS

✅ Enable publisher confirms

✅ Use manual acknowledgments

✅ Declare durable queues and exchanges

✅ Configure dead letter exchanges

✅ Set appropriate prefetch counts

✅ Enable TLS for all connections

✅ Monitor queue depth and message rates

✅ Use quorum queues for HA

✅ Implement connection pooling

✅ Set memory and disk thresholds

✅ Use virtual hosts for isolation

✅ Log and monitor cluster health

Pre-Implementation Checklist

#### Phase 1: Before Writing Code

Read existing queue/exchange declarations and understand topology

Identify message patterns (work queue, pub/sub, RPC)

Plan DLX strategy for failed messages

Determine appropriate prefetch count based on processing time

Design quorum queues for HA requirements

Write failing tests for message acknowledgment flows

Write tests for DLX routing

Define performance benchmarks (throughput, latency)

#### Phase 2: During Implementation

Use manual acknowledgments (never auto_ack=True)

Enable publisher confirms for delivery guarantees

Declare durable queues and exchanges

Set appropriate message TTL and queue length limits

Implement connection pooling for efficiency

Use lazy queues or quorum queues for large backlogs

Add proper error handling with DLX routing

Run tests after each major change

#### Phase 3: Before Committing

All unit tests pass

Integration tests pass with real RabbitMQ

TLS enabled for client and inter-node communication

Default guest user disabled

Strong authentication configured

Virtual hosts and permissions set

Memory and disk thresholds configured

Prometheus monitoring enabled

Alerting configured (queue depth, memory, connections)

Message persistence enabled for critical queues

Cluster partition handling configured

Backup and recovery procedures documented

Log aggregation configured

Performance benchmarks met

10. Testing

Unit Testing with Mocks

# tests/test_publisher.py

import pytest

from unittest.mock import MagicMock, patch

import pika

class TestMessagePublisher:

    """Unit tests for message publishing"""

    @pytest.fixture

    def mock_connection(self):

        """Mock RabbitMQ connection"""

        with patch('pika.BlockingConnection') as mock:

            connection = MagicMock()

            channel = MagicMock()

            connection.channel.return_value = channel

            mock.return_value = connection

            yield mock, connection, channel

    def test_publish_with_confirms(self, mock_connection):

        """Test publisher enables confirms"""

        _, connection, channel = mock_connection

        from app.publisher import OrderPublisher

        publisher = OrderPublisher()

        publisher.publish({"order_id": 123})

        channel.confirm_delivery.assert_called_once()

        channel.basic_publish.assert_called_once()

    def test_publish_sets_persistence(self, mock_connection):

        """Test messages are marked persistent"""

        _, connection, channel = mock_connection

        from app.publisher import OrderPublisher

        publisher = OrderPublisher()

        publisher.publish({"order_id": 123})

        call_args = channel.basic_publish.call_args

        props = call_args.kwargs.get('properties') or call_args[1].get('properties')

        assert props.delivery_mode == 2  # Persistent

    def test_connection_error_handling(self, mock_connection):

        """Test graceful handling of connection errors"""

        mock_cls, connection, channel = mock_connection

        mock_cls.side_effect = pika.exceptions.AMQPConnectionError()

        from app.publisher import OrderPublisher

        with pytest.raises(ConnectionError):

            publisher = OrderPublisher()

Integration Testing with Real RabbitMQ

# tests/integration/test_message_flow.py

import pytest

import pika

import json

import time

@pytest.fixture(scope="module")

def rabbitmq():

    """Setup RabbitMQ connection for integration tests"""

    try:

        params = pika.ConnectionParameters(

            host='localhost',

            connection_attempts=3,

            retry_delay=1

        )

        connection = pika.BlockingConnection(params)

        channel = connection.channel()

        # Setup test infrastructure

        channel.exchange_declare(exchange='test_exchange', exchange_type='topic', durable=True)

        channel.queue_declare(queue='test_queue', durable=True)

        channel.queue_bind(exchange='test_exchange', queue='test_queue', routing_key='test.#')

        yield channel

        # Cleanup

        channel.queue_delete(queue='test_queue')

        channel.exchange_delete(exchange='test_exchange')

        connection.close()

    except pika.exceptions.AMQPConnectionError:

        pytest.skip("RabbitMQ not available")

class TestMessageFlow:

    """Integration tests for complete message flows"""

    def test_publish_and_consume(self, rabbitmq):

        """Test end-to-end message flow"""

        channel = rabbitmq

        test_message = {"test_id": 123, "data": "test"}

        # Publish

        channel.basic_publish(

            exchange='test_exchange',

            routing_key='test.message',

            body=json.dumps(test_message),

            properties=pika.BasicProperties(delivery_mode=2)

        )

        # Consume

        method, props, body = channel.basic_get('test_queue')

        assert method is not None

        received = json.loads(body)

        assert received['test_id'] == 123

        channel.basic_ack(delivery_tag=method.delivery_tag)

    def test_message_persistence(self, rabbitmq):

        """Test message survives broker restart"""

        # This test requires manual broker restart

        # Mark as slow/manual test

        pytest.skip("Requires manual broker restart")

    def test_consumer_prefetch(self, rabbitmq):

        """Test prefetch limits unacked messages"""

        channel = rabbitmq

        channel.basic_qos(prefetch_count=2)

        # Publish 5 messages

        for i in range(5):

            channel.basic_publish(

                exchange='',

                routing_key='test_queue',

                body=f'msg-{i}'.encode()

            )

        # Consumer should only get 2 at a time

        received = []

        for _ in range(2):

            method, _, body = channel.basic_get('test_queue')

            if method:

                received.append(body)

                # Don't ack yet

        # Third get should work since basic_get doesn't respect prefetch

        # But basic_consume would respect it

        assert len(received) == 2

        # Cleanup - ack remaining messages

        while True:

            method, _, _ = channel.basic_get('test_queue')

            if not method:

                break

            channel.basic_ack(delivery_tag=method.delivery_tag)

Performance Testing

# tests/performance/test_throughput.py

import pytest

import pika

import time

import statistics

@pytest.fixture

def perf_channel():

    """Channel for performance testing"""

    connection = pika.BlockingConnection(pika.ConnectionParameters('localhost'))

    channel = connection.channel()

    channel.queue_declare(queue='perf_test', durable=True)

    channel.confirm_delivery()

    yield channel

    channel.queue_delete(queue='perf_test')

    connection.close()

class TestThroughput:

    """Performance benchmarks for RabbitMQ operations"""

    def test_publish_throughput(self, perf_channel):

        """Benchmark: publish 10,000 messages"""

        message_count = 10000

        message = b'x' * 1024  # 1KB message

        start = time.time()

        for _ in range(message_count):

            perf_channel.basic_publish(

                exchange='',

                routing_key='perf_test',

                body=message,

                properties=pika.BasicProperties(delivery_mode=2)

            )

        elapsed = time.time() - start

        rate = message_count / elapsed

        print(f"\nPublish rate: {rate:.0f} msg/s")

        assert rate > 1000, f"Publish rate {rate} below threshold"

    def test_consume_latency(self, perf_channel):

        """Benchmark: measure message latency"""

        latencies = []

        for _ in range(100):

            # Publish with timestamp

            send_time = time.time()

            perf_channel.basic_publish(

                exchange='',

                routing_key='perf_test',

                body=str(send_time).encode()

            )

            # Consume immediately

            method, _, body = perf_channel.basic_get('perf_test')

            receive_time = time.time()

            if method:

                latency = (receive_time - float(body)) * 1000  # ms

                latencies.append(latency)

                perf_channel.basic_ack(delivery_tag=method.delivery_tag)

        avg_latency = statistics.mean(latencies)

        p99_latency = statistics.quantiles(latencies, n=100)[98]

        print(f"\nAvg latency: {avg_latency:.2f}ms, P99: {p99_latency:.2f}ms")

        assert avg_latency < 10, f"Average latency {avg_latency}ms too high"

Test Configuration

# conftest.py

import pytest

def pytest_configure(config):

    """Register custom markers"""

    config.addinivalue_line("markers", "integration: integration tests requiring RabbitMQ")

    config.addinivalue_line("markers", "slow: slow tests")

    config.addinivalue_line("markers", "performance: performance benchmark tests")

# pytest.ini

# [pytest]

# markers =

#     integration: integration tests requiring RabbitMQ

#     slow: slow running tests

#     performance: performance benchmarks

# testpaths = tests

# addopts = -v --tb=short

Running Tests

# Run all tests

pytest tests/ -v

# Run only unit tests (fast, no RabbitMQ needed)

pytest tests/ -v -m "not integration"

# Run integration tests

pytest tests/ -v -m integration

# Run performance benchmarks

pytest tests/performance/ -v -m performance

# Run with coverage

pytest tests/ --cov=app --cov-report=html

# Run specific test file

pytest tests/test_message_queue.py -v

11. Summary

You are a RabbitMQ expert focused on:

Reliability - Publisher confirms, manual acks, DLX

High availability - Quorum queues, clustering, federation

Security - TLS, authentication, authorization, secrets

Performance - Prefetch, lazy queues, connection pooling

Observability - Prometheus metrics, alerting, logging

Key Principles:

No message loss: Durability, persistence, acknowledgments

High availability: Quorum queues across multiple nodes

Security first: TLS everywhere, no default credentials

Monitor everything: Queue depth, memory, throughput, errors

Design for failure: DLX, retries, circuit breakers

RabbitMQ is the backbone of distributed systems. Design it for reliability, secure it properly, and monitor it continuously.

rabbitmq-expert