grpc/grpc-dotnet

Is it possible to use gRPC Client-side load balancing with a custom dns server?

kharabasz opened this issue · 5 comments

Is it possible to use gRPC Client-side load balancing with a custom dns server, like the one bundled with consul? Unless I missed something in the code and docs, it looks like the current implementation goes to the system's dns server configuration when performing lookups.

The address with DNS is configurable. Can you create a GrpcChannel with DNS + consul's DNS address (I'm not familiar with DNS + consul)

What would be the dns uri that would correspond with the following lookup?

dig @consul-dns-server-address -p 8600 service-to-call.service.consul SRV

Reading through more of the dotnet documentation, I think the answer is no.

The Dns.* methods seem to rely on the internal Win32 APIs which in turn go through the DNS servers associated with the network connection. In order to get this to work, I would have to change the DNS servers associated with the network adapter's address - which I don't want to do.

I will try writing a custom DnsResolver against a package like DnsClient.NET

Using DnsClient.NET I put together a simple ConsulDnsResolver (code below).

This worked in our development environment until instances re-allocated, which did not trigger a refresh of the resolver. The error getting logged is:

Grpc.Core.RpcException: Status(
    StatusCode=\"Unavailable\", 
    Detail=\"Error connecting to subchannel.\", 
    DebugException=\"System.Net.Sockets.SocketException: Connection refused\"
)
       System.Net.Sockets.SocketException (111): Connection refused
       at System.Net.Sockets.Socket.AwaitableSocketAsyncEventArgs.ThrowException(SocketError error, CancellationToken cancellationToken)
       at System.Net.Sockets.Socket.AwaitableSocketAsyncEventArgs.System.Threading.Tasks.Sources.IValueTaskSource.GetResult(Int16 token)
       at System.Net.Sockets.Socket.\u003cConnectAsync\u003eg__WaitForConnectWithCancellation|277_0(AwaitableSocketAsyncEventArgs saea, ValueTask connectTask, CancellationToken cancellationToken)
       at Grpc.Net.Client.Balancer.Internal.SocketConnectivitySubchannelTransport.TryConnectAsync(ConnectContext context)\n   --- End of inner exception stack trace ---\n   at Grpc.Net.Client.Balancer.Internal.ConnectionManager.PickAsync(PickContext context, Boolean waitForReady, CancellationToken cancellationToken)
       at Grpc.Net.Client.Balancer.Internal.BalancerHttpHandler.SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
       at Grpc.Net.Client.Internal.GrpcCall`2.RunCall(HttpRequestMessage request, Nullable`1 timeout)
       at Grpc.Net.Client.Internal.Retry.RetryCallBase`2.GetResponseCoreAsync()
       at (domain code)

@JamesNK Off the top of your head - do you see anything glaringly wrong or missing from this code?

private static IServiceCollection RegisterGrpcClient(this IServiceCollection services, IConfiguration configuration)
{
    services.AddTransient<ILookupClient, LookupClient>(sp =>
    {
        var consulDnsServerIPAddress = IPAddress.Parse("127.0.0.1");
        return new LookupClient(consulDnsServerIPAddress, 8600);
    });

    services.AddSingleton<ResolverFactory>(sp =>
        new ConsulDnsResolverFactory(sp.GetRequiredService<ILookupClient>()));

    services
        .AddGrpcClient<ServiceToCallClient>(options =>
        {
            options.Address = new Uri("consul://service-to-call.service.consul");
            options.ChannelOptionsActions.Add(channelOptions =>
            {
                channelOptions.Credentials = ChannelCredentials.SecureSsl;
                channelOptions.ServiceConfig = new ServiceConfig
                {
                    MethodConfigs =
                    {
                        new MethodConfig
                        {
                            Names = { MethodName.Default },
                            RetryPolicy = new RetryPolicy
                            {
                                MaxAttempts = 3,
                                InitialBackoff = TimeSpan.FromSeconds(1),
                                MaxBackoff = TimeSpan.FromSeconds(5),
                                BackoffMultiplier = 1.5,
                                RetryableStatusCodes =
                                {
                                    StatusCode.Unavailable,
                                    StatusCode.Internal,
                                    StatusCode.Unknown,
                                    StatusCode.Cancelled,
                                    StatusCode.Aborted
                                }
                            }
                        }
                    },

                };
            });
        });
    
    return services;
}

internal class ConsulDnsResolver : PollingResolver
{
    private readonly ILookupClient _lookupClient;
    private readonly Uri _address;

    public ConsulDnsResolver(Uri address, ILoggerFactory loggerFactory, ILookupClient lookupClient) 
        : base(loggerFactory)
    {
        _address = address;
        _lookupClient = lookupClient;
    }

    protected override async Task ResolveAsync(CancellationToken cancellationToken)
    {
        try
        {
            var result = await _lookupClient.QueryAsync(
                _address.Host,
                QueryType.SRV,
                cancellationToken: cancellationToken);

            var balancerAddresses = new List<BalancerAddress>();
            foreach (var dnsResourceRecord in result.Answers)
            {
                if (dnsResourceRecord is not SrvRecord srvRecord) { continue; }

                foreach (var additionalResourceRecord in result.Additionals)
                {
                    if (additionalResourceRecord is not ARecord aRecord) continue;

                    if (aRecord.DomainName.Equals(srvRecord.Target))
                    {
                        balancerAddresses.Add(new BalancerAddress(aRecord.Address.ToString(), srvRecord.Port));
                    }
                }
            }

            if (!balancerAddresses.Any())
            {
                Listener(ResolverResult.ForFailure(new Status(StatusCode.NotFound, "No Consul DNS records found")));
            }
            else
            {
                Listener(ResolverResult.ForResult(balancerAddresses));
            }
        }
        catch (Exception e)
        {
            Listener(ResolverResult.ForFailure(new Status(StatusCode.Unknown, e.Message, e)));
        }
    }
}

internal class ConsulDnsResolverFactory : ResolverFactory
{
    private readonly ILookupClient _lookupClient;

    public ConsulDnsResolverFactory(ILookupClient lookupClient)
    {
        _lookupClient = lookupClient;
    }

    public override Resolver Create(ResolverOptions options)
    {
        return new ConsulDnsResolver(options.Address, options.LoggerFactory, _lookupClient);
    }

    public override string Name => "consul";
}

It looks ok to me.