actiontech/dtle

API: /v2/job/migration/detail?job_id=xxx list incorrect "dtle_node_infos"

asiroliu opened this issue · 3 comments

Description

API: /v2/job/migration/detail?job_id=xxx list incorrect node

Steps to reproduce the issue

  1. create dtle job use different node
{
  "job_id": "multiple_to_one_incr_1",
  "is_password_encrypted": false,
  "task_step_name": "all",
  "failover": true,
  "retry": 2,
  "src_task": {
    "task_name": "src",
    "node_id": "2f3e31ed-09b3-1c1f-a348-e2ac4963f70a",
    "mysql_src_task_config": {
      "gtid": "",
      "binlog_relay": false
    },
    "drop_table_if_exists": true,
    "skip_create_db_table": false,
    "repl_chan_buffer_size": 120,
    "chunk_size": 2000,
    "group_max_size": 1,
    "group_timeout": 100,
    "connection_config": {
      "database_type": "MySQL",
      "host": "172.100.9.3",
      "port": 3306,
      "user": "test_src",
      "password": "test_src"
    },
    "replicate_do_db": [
      {
        "table_schema": "action_db",
        "tables": [
          {
            "table_name": "tb_1"
          }
        ]
      }
    ]
  },
  "dest_task": {
    "task_name": "dest",
    "node_id": "ed8eb296-c200-edc7-53a0-69003ca6115f",
    "mysql_dest_task_config": {
      "use_my_sql_dependency": false,
      "dependency_history_size": 2500,
      "parallel_workers": 32
    },
    "connection_config": {
      "database_type": "MySQL",
      "host": "172.100.9.1",
      "port": 3306,
      "user": "test_dest",
      "password": "test_dest"
    }
  }
}
  1. check job detail
$ curl -X GET "http://10.186.16.109:18190/v2/job/migration/detail?job_id=multiple_to_one_incr_1-migration" -H "accept: application/json"

Responses:

{
  "basic_task_profile": {
    "job_base_info": {
      "job_id": "multiple_to_one_incr_1-migration",
      "subscription_topic": "",
      "job_status": "running",
      "job_create_time": "2022-12-07T17:30:28+08:00",
      "job_steps": [
        {
          "step_name": "job_stage_full",
          "step_status": "start",
          "step_schedule": 0,
          "job_create_time": "2022-12-07T17:30:28+08:00"
        },
        {
          "step_name": "job_stage_incr",
          "step_status": "start",
          "step_schedule": 0,
          "job_create_time": "2022-12-07T17:30:28+08:00"
        }
      ],
      "delay": 0,
      "dump_progress": null
    },
    "dtle_node_infos": [
      {
        "node_addr": "172.100.9.13",
        "node_id": "2f3e31ed-09b3-1c1f-a348-e2ac4963f70a",
        "data_source": "172.100.9.3:3306",
        "source": "src"
      },
      {
        "node_addr": "172.100.9.13",
        "node_id": "2f3e31ed-09b3-1c1f-a348-e2ac4963f70a",
        "data_source": "172.100.9.1:3306",
        "source": "dest"
      }
    ],
    "connection_info": {
      "src_data_base": {
        "host": "172.100.9.3",
        "port": 3306,
        "user": "test_src",
        "password": "*",
        "service_name": "",
        "database_type": "MySQL"
      },
      "dst_data_base": {
        "host": "172.100.9.1",
        "port": 3306,
        "user": "test_dest",
        "password": "*",
        "service_name": "",
        "database_type": "MySQL"
      },
      "dst_kafka": {
        "task_name": "",
        "kafka_broker_addrs": null,
        "kafka_topic": "",
        "message_group_max_size": 0,
        "message_group_timeout": 0
      }
    },
    "configuration": {
      "fail_over": true,
      "retry_times": 2,
      "src_config": {
        "skip_create_db_table": false,
        "drop_table_if_exists": true,
        "mysql_src_task_config": {
          "expand_syntax_support": false,
          "gtid": "",
          "binlog_relay": false,
          "wait_on_job": "",
          "auto_gtid": false,
          "dump_entry_limit": 0,
          "two_way_sync": false,
          "two_way_sync_gtid": ""
        },
        "oracle_src_task_config": null,
        "group_max_size": 1,
        "group_timeout": 100,
        "repl_chan_buffer_size": 120,
        "chunk_size": 2000,
        "sql_filter": null
      },
      "dst_config": {
        "mysql_dest_task_config": {
          "parallel_workers": 32,
          "use_my_sql_dependency": false,
          "dependency_history_size": 2500,
          "bulk_insert1": 0,
          "bulk_insert2": 0,
          "set_gtid_next": false
        }
      }
    },
    "replicate_do_db": [
      {
        "table_schema": "action_db",
        "table_schema_regex": "",
        "table_schema_rename": "",
        "tables": [
          {
            "table_name": "tb_1",
            "table_regex": "",
            "table_rename": "",
            "column_map_from": null,
            "column_map_to": null,
            "where": ""
          }
        ]
      }
    ],
    "replicate_ignore_db": []
  },
  "task_logs": [
    {
      "task_events": [
        {
          "event_type": "Received",
          "setup_error": "",
          "message": "Task received by client",
          "time": "2022-12-07T17:30:29+08:00"
        },
        {
          "event_type": "Task Setup",
          "setup_error": "",
          "message": "Building Task Directory",
          "time": "2022-12-07T17:30:29+08:00"
        },
        {
          "event_type": "Started",
          "setup_error": "",
          "message": "Task started by client",
          "time": "2022-12-07T17:30:29+08:00"
        }
      ],
      "node_id": "2f3e31ed-09b3-1c1f-a348-e2ac4963f70a",
      "allocation_id": "b6cb500e-b032-2dda-2a50-76ba2bf746f4",
      "address": "172.100.9.13",
      "target": "src",
      "status": "run"
    },
    {
      "task_events": [
        {
          "event_type": "Received",
          "setup_error": "",
          "message": "Task received by client",
          "time": "2022-12-07T17:30:29+08:00"
        },
        {
          "event_type": "Task Setup",
          "setup_error": "",
          "message": "Building Task Directory",
          "time": "2022-12-07T17:30:29+08:00"
        },
        {
          "event_type": "Started",
          "setup_error": "",
          "message": "Task started by client",
          "time": "2022-12-07T17:30:29+08:00"
        }
      ],
      "node_id": "2f3e31ed-09b3-1c1f-a348-e2ac4963f70a",
      "allocation_id": "b6cb500e-b032-2dda-2a50-76ba2bf746f4",
      "address": "172.100.9.13",
      "target": "dest",
      "status": "run"
    }
  ],
  "message": "ok"
}
  1. check job detail from nomad UI, it's match the create job setting.
    image

Output of ./dtle version:**

9.9.9.9-master-19fd66b

Additional information

(e.g. issue happens only occasionally)

Additional details (log, config, job config etc):

问题: DestConfig移动到SrcConfig的变更中, API相关代码变更错误.

2 在consul上储存API所需信息

现状

  • nomad层面的数据, 发送到任意nomad agent都是一样的. 无需转发.
  • dtle层面的数据, 只有运行task的dtle节点才有. 需要转发.
  • 向nomad查询allocations, 确定task执行的nomad节点ip
  • 将该ip与当前dtle配置ip比较

问题

  • 最开始使用配置项NomadHost, 但该配置实际可以指向其他Nomad节点
    • 考虑nomad server /client 分离的情况
  • 后来改用ApiAddr, 但该配置可能为0.0.0.0:8190

变更

  1. 简化信息
  2. consul上储存
  3. 避免转发

原来使用到转发的API /v2/monitor/task, 目前没人用. 暂时改成返回dummy data. 考虑更改成从各dtle节点的/metrics收集信息.

bfa981c: 将全量进度DumpProgress保存在consul/kv/dtle/jobName/DumpProgress.

  • 目前格式为json[exec, total]
  • 由于consul kv有size limit (512KB), 无法储存各表数据
  • 各表单独显示exec/total需要另寻方案