Database 'thundering herd' during mass reconnection after cluster failure


## Environment

- ejabberd version: 24.12
- Erlang version: `14.2.1`
- OS: Linux (Debian)
- Installed from: docker

## Configuration 

```yaml
loglevel: info
log_rotate_size: 1048576000
log_rotate_count: 7

certfiles:
  - "/home/ejabberd/conf/domain.pem"
ca_file: "/home/ejabberd/conf/cacert.pem"

sql_type: mysql
sql_pool_size: 20
sql_keepalive_interval: 1
sql_start_interval: 5
new_sql_schema: false
update_sql_schema: false
default_db: sql
default_ram_db: mnesia
auth_method: sql

allow_contrib_modules: true
cache_size: 20000
max_fsm_queue: 30000


listen:
  - port: 5222
    ip: "::"
    module: ejabberd_c2s
    protocol_options:
      - "no_sslv2"
      - "no_sslv3"
      - "no_tlsv1"
      - "no_tlsv1_1"
    ciphers: "my cipher"
    starttls: true
    starttls_required: false
    tls_compression: false
    max_stanza_size: 262144
    shaper: c2s_shaper
    access: c2s
    use_proxy_protocol: false

  - port: 5269
    ip: "::"
    module: ejabberd_s2s_in
    max_stanza_size: 524288

  - port: 5443
    ip: "::"
    module: ejabberd_http
    request_handlers:
      /upload: mod_my_http_upload
      /ws: ejabberd_http_ws
      /bosh: mod_bosh
    captcha: false
    use_proxy_protocol: false
    tls: false

  - port: 5280
    ip: "::"
    module: ejabberd_http
    request_handlers:
      /admin: ejabberd_web_admin
      /api/v0: mod_http_api
      /api: mod_http_api
    captcha: false
    use_proxy_protocol: false
    tls: false

  - port: 9334
    ip: "::"
    module: ejabberd_http
    request_handlers:
      /metrics: mod_prometheus
    captcha: false
    use_proxy_protocol: false
    tls: false

s2s_use_starttls: optional


acl:
  local:
    user_regexp: ""
  loopback:
    ip:
      - 127.0.0.0/8
      - ::1/128
  admin:
    user:
      - "admin@myserver.com"


access_rules:
  local:
    allow: local
  c2s:
    deny: blocked
    allow: all
  announce:
    allow: admin
  configure:
    allow: admin
  muc_create:
    allow: local
  pubsub_createnode:
    allow: local
  register:
    allow: local
  trusted_network:
    allow: loopback


api_permissions:
  "console commands":
    from:
      - ejabberd_ctl
    who: all
    what: "*"
  "admin access":
    who:
      - ip: "127.0.0.0/8"
      - access:
        - allow:
          - acl: loopback
          - acl: admin
    what:
      - "*"
      - "!stop"
      - "!start"
  "public commands":
    who:
      ip: 127.0.0.1/8
    what:
      - status
      - connected_users_number


shaper:
  normal:
    rate: 3000
    burst_size: 20000
  fast: 100000


shaper_rules:
  max_user_sessions: 10
  max_user_offline_messages:
    5000: admin
    500: all
  c2s_shaper:
    none: admin
    normal: all
  s2s_shaper: fast


acme:
  auto: false
  contact: "mailto:hostmaster@myserver.com"
  ca_url: "https://acme-v02.api.letsencrypt.org/directory"


modules:
  mod_adhoc: {}
  # mod_admin_update_sql: {}
  mod_admin_extra: {}
  mod_announce:
    access: announce

  mod_avatar: {}
  mod_blocking: {}
  mod_bosh: {}
  mod_caps:
    use_cache: true

  mod_carboncopy: {}
  mod_client_state: {}
  mod_configure: {}
  mod_disco: {}
  # mod_fail2ban: {}
  mod_http_api: 
    default_version: 0
  # mod_http_upload: {}
  mod_last: {}
  mod_mam:
    ## Mnesia is limited to 2GB, better to use an SQL backend
    ## For small servers SQLite is a good fit and is very easy
    ## to configure. Uncomment this when you have SQL configured:
    db_type: sql
    assume_mam_usage: true
    default: always
    user_mucsub_from_muc_archive: true

  # mod_mqtt: {}
  mod_muc:
    access:
      - allow
    access_admin:
      - allow: admin
    access_create: 
      - allow: admin
    access_persistent: muc_create
    access_mam:
      - allow
    default_room_options:
      allow_user_invites: false
      allow_subscription: true
      allow_change_subj: false
      allow_query_users: true
      allowpm: anyone
      mam: true
      members_by_default: true
      members_only: false
      logging: true
      persistent: true
      anonymous: false
      public: false
      presence_broadcast:
        - visitor
    history_size: 0
    max_users: 5000
    max_user_conferences: 5000
    preload_rooms: false

  mod_muc_admin: {
    subscribe_room_many_max_users: 4000
  }
  mod_offline:
    access_max_user_messages: max_user_offline_messages

  mod_ping:
    send_pings: true
    ping_interval: 60
    ping_ack_timeout: 30
    timeout_action: kill

  mod_privacy: {}
  # mod_private: {}
  mod_proxy65:
    access: local
    max_connections: 5

  mod_pubsub:
    access_createnode: pubsub_createnode
    ## reduces resource comsumption, but XEP incompliant
    ignore_pep_from_offline: false
    ## XEP compliant, but increases resource comsumption
    ## ignore_pep_from_offline: false
    last_item_cache: false
    max_items_node: 1000
    plugins:
      - flat
      - pep
    force_node_config:
      ## Change from "whitelist" to "open" to enable OMEMO support
      ## See https://github.com/processone/ejabberd/issues/2425
      "eu.siacs.conversations.axolotl.*":
        access_model: whitelist
      ## Avoid buggy clients to make their bookmarks public
      storage:bookmarks:
        access_model: whitelist
      service:roster:x:
        access_model: presence
        notification_type: normal

  mod_push: {}
  mod_push_keepalive: {}
  mod_register:
    ## Only accept registration requests from the "trusted"
    ## network (see access_rules section above).
    ## Think twice before enabling registration from any
    ## address. See the Jabber SPAM Manifesto for details:
    ## https://github.com/ge0rg/jabber-spam-fighting-manifesto
    ip_access: trusted_network

  mod_roster:
    versioning: true
    store_current_id: true
    db_type: sql
    cache_size: 80000

  mod_s2s_dialback: {}
  mod_shared_roster: {}
  mod_stream_mgmt:
    resend_on_timeout: true
    resume_timeout: 30
    max_ack_queue: 10000

  # mod_stun_disco: {}
  mod_vcard: {}
  mod_vcard_xupdate: {}
  mod_version:
    show_os: false
...
```


## Bug description
We are experiencing a critical "thundering herd" scenario following cluster instability. When a cluster node fails and users are disconnected, if the outage exceeds the resume_timeout period, all clients attempt a full reconnection simultaneously once the service becomes available.
This results in a massive spike in database load (CPU and Memory) as thousands of clients perform full authentication and session binding at the same time. In our recent incident, the database became completely unresponsive (choked), leading to a "reconnection recycle" loop where the DB couldn't process enough requests to stabilize the cluster.
Resolution Observed:
We were only able to stabilize the system after scaling our DB resources (CPU and Memory) by 3x to handle the initial surge.


I see that for each reconnection, the following queries are being run in the DB: 
```
2025-06-10 15:30:38.213780+00:00 [debug] <0.646.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select version from roster_version where username = 'username' and 0=0"
2025-06-10 15:30:38.246180+00:00 [debug] <0.648.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "begin;"
2025-06-10 15:30:38.246621+00:00 [debug] <0.648.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select username, xml from spool where username='username' and 0=0 order by seq;"
2025-06-10 15:30:38.247114+00:00 [debug] <0.648.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "delete from spool where username='username' and 0=0;"
2025-06-10 15:30:38.247336+00:00 [debug] <0.648.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "commit;"
2025-06-10 15:30:38.247600+00:00 [debug] <0.650.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select username, jid, nick, subscription, ask, askmessage, server, subscribe, type from rosterusers where username='username' and 0=0"
2025-06-10 15:30:38.248444+00:00 [debug] <0.652.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select jid, grp from rostergroups where username='username' and 0=0"
2025-06-10 15:30:38.249294+00:00 [debug] <0.654.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select node, plugin, i.nodeid, jid, subscriptions from pubsub_state i, pubsub_node n where i.nodeid = n.nodeid and jid in ('username@myserver.com/6186', 'username@myserver.com') and host='pubsub.myserver.com'"
2025-06-10 15:30:38.249978+00:00 [debug] <0.654.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select name, val from pubsub_node_option where nodeid=6354"
2025-06-10 15:30:38.250523+00:00 [debug] <0.675.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select itemid, publisher, creation, modification, payload from pubsub_item where nodeid='6354' order by modification desc  limit 1"
2025-06-10 15:30:38.252298+00:00 [debug] <0.617.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select host, node, plugin, i.nodeid, jid, subscriptions from pubsub_state i, pubsub_node n where i.nodeid = n.nodeid and jid in ('username@myserver.com/186','username@myserver.com') and host like '%@myserver.com' "
2025-06-10 15:30:38.252743+00:00 [debug] <0.617.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select name, val from pubsub_node_option where nodeid=11939"
2025-06-10 15:30:38.253870+00:00 [debug] <0.617.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select name, val from pubsub_node_option where nodeid=11940"
2025-06-10 15:30:38.254270+00:00 [debug] <0.620.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select username, jid, nick, subscription, ask, askmessage, server, subscribe, type from rosterusers where username='username' and 0=0"
2025-06-10 15:30:38.255223+00:00 [debug] <0.622.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select jid, grp from rostergroups where username='username' and 0=0"
2025-06-10 15:30:38.301391+00:00 [debug] <0.624.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select node, parent, plugin, nodeid from pubsub_node where host='username@myserver.com' and node='storage:bookmarks'"
2025-06-10 15:30:38.305027+00:00 [debug] <0.626.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select subscription, ask from rosterusers where username='username' and 0=0 and jid='username@myserver.com'"
2025-06-10 15:30:38.306016+00:00 [debug] <0.628.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select node, parent, plugin, nodeid from pubsub_node where host='username@myserver.com'"
2025-06-10 15:30:38.306543+00:00 [debug] <0.628.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select name, val from pubsub_node_option where nodeid=11939"
2025-06-10 15:30:38.306958+00:00 [debug] <0.628.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select name, val from pubsub_node_option where nodeid=11940"
2025-06-10 15:30:38.307450+00:00 [debug] <0.630.0>@ejabberd_sql:sql_query_internal/1:760 SQL: "select itemid, publisher, creation, modification, payload from pubsub_item where nodeid='11939'"
```

Is there and configuration or something else we can do to avoid this situation?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Database 'thundering herd' during mass reconnection after cluster failure #4509

Environment

Configuration

Bug description

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Database 'thundering herd' during mass reconnection after cluster failure #4509

Description

Environment

Configuration

Bug description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions