facebook/mcrouter

AsyncSocketException: AsyncMcClient, type = Socket not open

anuphalarnkar opened this issue · 1 comments

Hi,

I am running a mcrouter container with 3 memcached instances in a cluster...
I am running into an error when I try to run a workload via a LoadBalancer...

Any hints would be helpful. Thanks in advance. Attached logs below.

I0124 10:22:47.529297 1 StandaloneUtils.cpp:479] .0 mcrouter startup (1)
I0124 10:22:47.529446 1 StandaloneUtils.cpp:192] Starting Memcache router
I0124 10:22:47.529476 1 Server-inl.h:496] Spawning AsyncMcServer
I0124 10:22:47.530237 1 FunctionScheduler.cpp:397] Starting FunctionScheduler with 0 functions.
I0124 10:22:47.530498 1 CarbonRouterInstance-inl.h:628] creating config builder
I0124 10:22:47.531168 1 CarbonRouterInstance-inl.h:598] started reconfiguring
I0124 10:22:47.531427 1 CarbonRouterInstance-inl.h:616] reconfigured 1 proxies with 1 pools, 3 clients 6423f6330c93e5d05cbdf195daf66b7b)
W0124 10:22:47.531690 1 Server-inl.h:56] acl checker will not be enabled.

FAILURE 1674556123.762147 1 [AsyncMcClient] [bad-environment] [IOThreadPool0] network/AsyncMcClientImpl.cpp:362] AsyncSocketException: AsyncMcClient, type = Socket not open
"libmcrouter.mcrouter.5000":
{
"allow_only_gets":"0",
"async_spool":"/var/spool/mcrouter",
"asynclog_disable":"0",
"asynclog_port_override":"0",
"big_value_batch_size":"10",
"big_value_split_threshold":"0",
"client_queue_no_notify_rate":"0",
"client_queue_size":"1024",
"client_queue_wait_threshold_us":"0",
"collect_rxmit_stats_every_hz":"0",
"config":"",
"config_dump_root":"/var/mcrouter/config",
"config_file":"/etc/mcrouter/config.json",
"config_params":"",
"config_str":"",
"connect_timeout_retries":"0",
"constantly_reload_configs":"0",
"cross_cluster_timeout_ms":"0",
"cross_region_timeout_ms":"0",
"debug_fifo_root":"/var/mcrouter/fifos",
"default_qos_class":"0",
"default_qos_path":"0",
"default_route":"/././",
"disable_miss_on_arith_errors":"0",
"disable_reload_configs":"0",
"disable_request_deadline_check":"0",
"disable_shard_split_route":"0",
"disable_tko_tracking":"0",
"enable_axonlog":"0",
"enable_compression":"0",
"enable_failure_logging":"1",
"enable_flush_cmd":"0",
"enable_logging_route":"0",
"enable_partial_reconfigure":"0",
"enable_qos":"0",
"enable_send_to_main_shard_split":"1",
"enable_service_router":"0",
"enable_ssl_tfo":"0",
"external_carbon_connection_log_max_burst":"500",
"external_carbon_connection_log_rate_per_hour":"3600",
"external_carbon_connection_log_sample_rate":"10000",
"external_carbon_connection_logging_enabled":"0",
"failures_until_tko":"3",
"fibers_max_pool_size":"1000",
"fibers_pool_resize_period_ms":"60000",
"fibers_record_stack_size_every":"100000",
"fibers_stack_size":"24576",
"fibers_use_guard_pages":"1",
"file_observer_poll_period_ms":"100",
"file_observer_sleep_before_update_ms":"1000",
"flavor_name":"unknown",
"group_remote_errors":"0",
"jemalloc_nodump_buffers":"0",
"keepalive_cnt":"0",
"keepalive_idle_s":"300",
"keepalive_interval_s":"60",
"logging_rtt_outlier_threshold_us":"0",
"max_dumped_config_age":"43200",
"max_no_flush_event_loops":"5",
"max_rxmit_reconnect_threshold":"0",
"max_shadow_token_map_size":"1024",
"min_rxmit_reconnect_threshold":"0",
"miss_on_get_errors":"1",
"num_proxies":"1",
"pem_ca_path":"",
"pem_cert_path":"",
"pem_key_path":"",
"pool_stats_config_file":"",
"post_reconfiguration_delay_ms":"0",
"probe_delay_initial_ms":"10000",
"probe_delay_max_ms":"60000",
"proxy_max_inflight_requests":"0",
"proxy_max_inflight_shadow_requests":"0",
"proxy_max_throttled_requests":"0",
"reconfiguration_delay_ms":"1000",
"reconfiguration_jitter_ms":"0",
"reset_inactive_connection_interval":"60000",
"router_name":"5000",
"runtime_vars_file":"",
"rxmit_latency_deviation_us":"0",
"send_invalid_route_to_default":"0",
"server_timeout_ms":"1000",
"service_name":"mcrouter",
"ssl_connection_cache":"0",
"ssl_handshake_offload":"0",
"ssl_service_identity":"",
"ssl_service_identity_authorization_enforce":"0",
"ssl_service_identity_authorization_log":"0",
"ssl_verify_peers":"0",
"stats_async_queue_length":"50",
"stats_logging_interval":"10000",
"stats_root":"/var/mcrouter/stats",
"target_max_inflight_requests":"0",
"target_max_pending_requests":"100000",
"target_max_shadow_requests":"1000",
"tcp_rto_min":"-1",
"test_mode":"0",
"thread_affinity":"0",
"thrift_compression_threshold":"0",
"tls_prefer_ocb_cipher":"0",
"use_asynclog_version2":"0",
"version":".0 mcrouter",
"waiting_request_timeout_ms":"0",
"within_cluster_timeout_ms":"0"
}

This is the command:
mcrouter -p 5000 --config-file=/etc/mcrouter/config.json

Contents of config.json:

{
"pools": {
"A": {
"servers": [
// hosts of replicated pool, https://github.com/facebook/mcrouter/wiki/Replicated-pools-setup e.g.:
"social-network-0.social-network.social-network.svc.cluster.local:11211",
"social-network-1.social-network.social-network.svc.cluster.local:11211",
"social-network-2.social-network.social-network.svc.cluster.local:11211",
]
}
},
"route": {
"type": "OperationSelectorRoute",
"default_policy": "PoolRoute|A",
"operation_policies": {
"add": "AllFastestRoute|Pool|A",
"delete": "AllFastestRoute|Pool|A",
"get": "LatestRoute|Pool|A",
"set": "AllFastestRoute|Pool|A"
}
}
}

Hello,

This is in continuation to previous issue. I used mcpiper to debug logs inside the container for mcrouter. The logs are attached below.
I can see that the logs do contain the message "mc_res_remote_error".

I also compared older version of McRouter on Ubuntu 16.04 where I get different messages like "mc_res_notfound" and "mc_res_stored", but not "mc_res_remote_error".

Any inputs on why error is on arm64 and not on x86 this will be greatly appreciated.

Thanks in advance