Vonng/pigsty

Citus cluster HA not work

Opened this issue · 1 comments

环境信息

共4台机器

  • 1 * coordinator master
  • 1 * coordinator slave (streaming replication)
  • 2 * citus data node

本地表正常

  1. coordinator master 上创建本地表 test,coordinator slave 能正常同步 test 表内容
    1. master 停机后,slave 能正常读写 test 表;
    2. master 重新开机,能正常把 test 表已修改的内容同步到本实例

Citus 分片表异常

  1. coordinator master 上创建 citus 分片表 test2,master 实例读写正常
    1. slave 无法读取该表内容,报错 ERROR: connection to the remote node 192.168.12.8:5432 faled with the following error: fe sendauth: no password supplied
    2. master 停机,slave 无法读取 test2,报错同上
    3. master 重新开机,master 实例也出现如上报错,2个data node 仅能提供 ro 服务
-- 创建 citus 分片表
set citus.shard_count=2;
set citus.shard_replication_factor=2;
create table test2(id int primary key ,name varchar);
SELECT create_distributed_table('test2', 'id', 'hash');
insert into test2 select id,md5(random()::text) from generate_series(1,500) as id;

预期结果

Citus分片表高可用功能正常工作

pigsty.yml 配置

all:
  children:

    #----------------------------------#
    # infra: monitor, alert, repo, etc..
    #----------------------------------#
    infra: { hosts: { 192.168.12.6: { infra_seq: 1 } }}
      #hosts:
       # 192.168.12.6: { infra_seq:: 1 }  # etcd_seq required
       # 192.168.12.7: { infra_seq:: 2 }  # assign from 1 ~ n
       # 192.168.12.8: { infra_seq:: 3 }  # odd number please


    #----------------------------------#
    # etcd cluster for HA postgres DCS
    #----------------------------------#
    etcd: # { hosts: { 192.168.12.6: { etcd_seq: 1 } }, vars: { etcd_cluster: etcd } }
      hosts:  # 1 node for testing, 3 or 5 for production
        192.168.12.6: { etcd_seq: 1 }  # etcd_seq required
        192.168.12.7: { etcd_seq: 2 }  # assign from 1 ~ n
        192.168.12.8: { etcd_seq: 3 }  # odd number please
      vars: # cluster level parameter override roles/etcd
        etcd_cluster: etcd  # mark etcd cluster name etcd

    #----------------------------------#
    # minio (OPTIONAL backup repo)
    #----------------------------------#
    #minio: { hosts: { 192.168.12.6: { minio_seq: 1 } }, vars: { minio_cluster: minio } }

    pg-citus0:
      hosts:
        192.168.12.6: { pg_seq: 1, pg_role: primary }
        192.168.12.5: { pg_seq: 2, pg_role: replica }
      vars:
        pg_cluster: pg-citus0
        pg_group: 0
        pg_mode: citus                    # pgsql cluster mode: citus
        pg_shard: pg-citus                # citus shard name: pg-citus
        patroni_citus_db: meta            # citus distributed database name
        pg_dbsu_password: DBUser.Postgres # all dbsu password access for citus cluster
        pg_vip_enabled: true
        pg_vip_address: 192.168.12.100/16
        pg_vip_interface: eth0
        pg_extensions: [ 'citus_${pg_version}* postgis33_${pg_version}* timescaledb-2-postgresql-${pg_version}* pgvector_${pg_version}*' ]
        pg_libs: 'citus, timescaledb, pg_stat_statements, auto_explain' # citus will be added by patroni automatically
        pg_users:  [{ name: test_admin , password: test_admin , pgbouncer: true , roles: [ dbrole_admin ] }, { name: test_rw , password: test_rw , pgbouncer: true , roles: [ dbrole_readwrite ] }, { name: test_ro , password: test_ro , pgbouncer: true , roles: [ dbrole_readonly ] }]
        pg_databases:  [{ name: haha-test ,owner: test_admin ,extensions: [{name: citus}, {name: postgis}] }]
        pg_hba_rules:
          - { user: 'all' ,db: all  ,addr: 0.0.0.0/0 ,auth: trust ,title: 'trust citus cluster members'       }
          - { user: 'all' ,db: all  ,addr: 127.0.0.1/32  ,auth: ssl   ,title: 'all user ssl access from localhost' }
          - { user: 'all' ,db: all  ,addr: intra         ,auth: ssl   ,title: 'all user ssl access from intranet'  }

    pg-citus1: # citus data node 1, pg_group = 1
      hosts:
        192.168.12.7: { pg_seq: 1, pg_role: primary }
      vars:
        pg_cluster: pg-citus1
        pg_group: 1
        pg_mode: citus                    # pgsql cluster mode: citus
        pg_shard: pg-citus                # citus shard name: pg-citus
        patroni_citus_db: meta             # citus distributed database name
        pg_dbsu_password: DBUser.Postgres # all dbsu password access for citus cluster
        pg_vip_enabled: false
        pg_vip_address: 192.168.12.101/16
        pg_vip_interface: eth0
        pg_extensions: [ 'citus_${pg_version}* postgis33_${pg_version}* timescaledb-2-postgresql-${pg_version}* pgvector_${pg_version}*' ]
        pg_libs: 'citus, timescaledb, pg_stat_statements, auto_explain' # citus will be added by patroni automatically
        pg_users:  [{ name: test_admin , password: test_admin , pgbouncer: true , roles: [ dbrole_admin ] }, { name: test_rw , password: test_rw , pgbouncer: true , roles: [ dbrole_readwrite ] }, { name: test_ro , password: test_ro , pgbouncer: true , roles: [ dbrole_readonly ] }]
        pg_databases:  [{ name: haha-test ,owner: test_admin ,extensions: [{name: citus}, {name: postgis}] }]
        pg_hba_rules:
          - { user: 'all' ,db: all  ,addr: 0.0.0.0/0 ,auth: trust ,title: 'trust citus cluster members'        }
          - { user: 'all' ,db: all  ,addr: 127.0.0.1/32  ,auth: ssl   ,title: 'all user ssl access from localhost' }
          - { user: 'all' ,db: all  ,addr: intra         ,auth: ssl   ,title: 'all user ssl access from intranet'  }

    pg-citus2: # citus data node 1, pg_group = 1
      hosts:
        192.168.12.8: { pg_seq: 1, pg_role: primary }
      vars:
        pg_cluster: pg-citus2
        pg_group: 2
        pg_mode: citus                    # pgsql cluster mode: citus
        pg_shard: pg-citus                # citus shard name: pg-citus
        patroni_citus_db: meta             # citus distributed database name
        pg_dbsu_password: DBUser.Postgres # all dbsu password access for citus cluster
        pg_vip_enabled: false
        pg_vip_address: 192.168.12.102/16
        pg_vip_interface: eth0
        pg_extensions: [ 'citus_${pg_version}* postgis33_${pg_version}* timescaledb-2-postgresql-${pg_version}* pgvector_${pg_version}*' ]
        pg_libs: 'citus, timescaledb, pg_stat_statements, auto_explain' # citus will be added by patroni automatically
        pg_users:  [{ name: test_admin , password: test_admin , pgbouncer: true , roles: [ dbrole_admin ] }, { name: test_rw , password: test_rw , pgbouncer: true , roles: [ dbrole_readwrite ] }, { name: test_ro , password: test_ro , pgbouncer: true , roles: [ dbrole_readonly ] }]
        pg_databases:  [{ name: haha-test ,owner: test_admin ,extensions: [{name: citus}, {name: postgis}] }]
        pg_hba_rules:
          - { user: 'all' ,db: all  ,addr: 0.0.0.0/0 ,auth: trust ,title: 'trust citus cluster members'        }
          - { user: 'all' ,db: all  ,addr: 127.0.0.1/32  ,auth: ssl   ,title: 'all user ssl access from localhost' }
          - { user: 'all' ,db: all  ,addr: intra         ,auth: ssl   ,title: 'all user ssl access from intranet'  }


  #----------------------------------#
  # GLOBAL VARS
  #----------------------------------#
  vars:                               # global parameters
    version: v2.6.0                   # pigsty version string
    admin_ip: 192.168.12.6             # admin node ip address
    region: china                     # upstream mirror region: default,china,europe
    infra_portal:                     # domain names and upstream servers
      home         : { domain: h.pigsty }
      grafana      : { domain: g.pigsty ,endpoint: "${admin_ip}:3000" , websocket: true }
      prometheus   : { domain: p.pigsty ,endpoint: "${admin_ip}:9090" }
      alertmanager : { domain: a.pigsty ,endpoint: "${admin_ip}:9093" }
      blackbox     : { endpoint: "${admin_ip}:9115" }
      loki         : { endpoint: "${admin_ip}:3100" }
      #minio        : { domain: sss.pigsty  ,endpoint: "${admin_ip}:9001" ,scheme: https ,websocket: true }
      
......

I'll look into it. Usually this is releated to pg_dbsu_password and .pgpass