googleapis/python-bigtable

tests.system.test_instance_admin: test_instance_create_w_two_clusters failed

flaky-bot opened this issue · 2 comments

Note: #454 was also for this test, but it was closed more than 10 days ago. So, I didn't mark it flaky.


commit: e30a34b
buildURL: Build Status, Sponge
status: failed

Test output
target = functools.partial(>)
predicate = .if_exception_type_predicate at 0x7f98ba7ff9d0>
sleep_generator = 
deadline = 120, on_error = None
def retry_target(target, predicate, sleep_generator, deadline, on_error=None):
    """Call a function and retry if it fails.

    This is the lowest-level retry helper. Generally, you'll use the
    higher-level retry helper :class:`Retry`.

    Args:
        target(Callable): The function to call and retry. This must be a
            nullary function - apply arguments with `functools.partial`.
        predicate (Callable[Exception]): A callable used to determine if an
            exception raised by the target should be considered retryable.
            It should return True to retry or False otherwise.
        sleep_generator (Iterable[float]): An infinite iterator that determines
            how long to sleep between retries.
        deadline (float): How long to keep retrying the target. The last sleep
            period is shortened as necessary, so that the last retry runs at
            ``deadline`` (and not considerably beyond it).
        on_error (Callable[Exception]): A function to call while processing a
            retryable exception.  Any error raised by this function will *not*
            be caught.

    Returns:
        Any: the return value of the target function.

    Raises:
        google.api_core.RetryError: If the deadline is exceeded while retrying.
        ValueError: If the sleep generator stops yielding values.
        Exception: If the target raises a method that isn't retryable.
    """
    if deadline is not None:
        deadline_datetime = datetime_helpers.utcnow() + datetime.timedelta(
            seconds=deadline
        )
    else:
        deadline_datetime = None

    last_exc = None

    for sleep in sleep_generator:
        try:
          return target()

.nox/system-3-8/lib/python3.8/site-packages/google/api_core/retry.py:190:


self = <google.api_core.operation.Operation object at 0x7f98b838c0a0>
retry = <google.api_core.retry.Retry object at 0x7f98ba804250>

def _done_or_raise(self, retry=DEFAULT_RETRY):
    """Check if the future is done and raise if it's not."""
    kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry}

    if not self.done(**kwargs):
      raise _OperationNotComplete()

E google.api_core.future.polling._OperationNotComplete

.nox/system-3-8/lib/python3.8/site-packages/google/api_core/future/polling.py:89: _OperationNotComplete

The above exception was the direct cause of the following exception:

self = <google.api_core.operation.Operation object at 0x7f98b838c0a0>
timeout = 120, retry = <google.api_core.retry.Retry object at 0x7f98ba804250>

def _blocking_poll(self, timeout=None, retry=DEFAULT_RETRY):
    """Poll and wait for the Future to be resolved.

    Args:
        timeout (int):
            How long (in seconds) to wait for the operation to complete.
            If None, wait indefinitely.
    """
    if self._result_set:
        return

    retry_ = self._retry.with_deadline(timeout)

    try:
        kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry}
      retry_(self._done_or_raise)(**kwargs)

.nox/system-3-8/lib/python3.8/site-packages/google/api_core/future/polling.py:110:


args = (), kwargs = {}
target = functools.partial(<bound method PollingFuture._done_or_raise of <google.api_core.operation.Operation object at 0x7f98b838c0a0>>)
sleep_generator = <generator object exponential_sleep_generator at 0x7f98b83816d0>

@functools.wraps(func)
def retry_wrapped_func(*args, **kwargs):
    """A wrapper that calls target function with retry."""
    target = functools.partial(func, *args, **kwargs)
    sleep_generator = exponential_sleep_generator(
        self._initial, self._maximum, multiplier=self._multiplier
    )
  return retry_target(
        target,
        self._predicate,
        sleep_generator,
        self._deadline,
        on_error=on_error,
    )

.nox/system-3-8/lib/python3.8/site-packages/google/api_core/retry.py:283:


target = functools.partial(<bound method PollingFuture._done_or_raise of <google.api_core.operation.Operation object at 0x7f98b838c0a0>>)
predicate = <function if_exception_type..if_exception_type_predicate at 0x7f98ba7ff9d0>
sleep_generator = <generator object exponential_sleep_generator at 0x7f98b83816d0>
deadline = 120, on_error = None

def retry_target(target, predicate, sleep_generator, deadline, on_error=None):
    """Call a function and retry if it fails.

    This is the lowest-level retry helper. Generally, you'll use the
    higher-level retry helper :class:`Retry`.

    Args:
        target(Callable): The function to call and retry. This must be a
            nullary function - apply arguments with `functools.partial`.
        predicate (Callable[Exception]): A callable used to determine if an
            exception raised by the target should be considered retryable.
            It should return True to retry or False otherwise.
        sleep_generator (Iterable[float]): An infinite iterator that determines
            how long to sleep between retries.
        deadline (float): How long to keep retrying the target. The last sleep
            period is shortened as necessary, so that the last retry runs at
            ``deadline`` (and not considerably beyond it).
        on_error (Callable[Exception]): A function to call while processing a
            retryable exception.  Any error raised by this function will *not*
            be caught.

    Returns:
        Any: the return value of the target function.

    Raises:
        google.api_core.RetryError: If the deadline is exceeded while retrying.
        ValueError: If the sleep generator stops yielding values.
        Exception: If the target raises a method that isn't retryable.
    """
    if deadline is not None:
        deadline_datetime = datetime_helpers.utcnow() + datetime.timedelta(
            seconds=deadline
        )
    else:
        deadline_datetime = None

    last_exc = None

    for sleep in sleep_generator:
        try:
            return target()

        # pylint: disable=broad-except
        # This function explicitly must deal with broad exceptions.
        except Exception as exc:
            if not predicate(exc):
                raise
            last_exc = exc
            if on_error is not None:
                on_error(exc)

        now = datetime_helpers.utcnow()

        if deadline_datetime is not None:
            if deadline_datetime <= now:
              raise exceptions.RetryError(
                    "Deadline of {:.1f}s exceeded while calling target function".format(
                        deadline
                    ),
                    last_exc,
                ) from last_exc

E google.api_core.exceptions.RetryError: Deadline of 120.0s exceeded while calling target function, last exception:

.nox/system-3-8/lib/python3.8/site-packages/google/api_core/retry.py:205: RetryError

During handling of the above exception, another exception occurred:

admin_client = <google.cloud.bigtable.client.Client object at 0x7f98ba54bcd0>
unique_suffix = '-1661046899162'
admin_instance_populated = <google.cloud.bigtable.instance.Instance object at 0x7f98b83d96d0>
admin_cluster = <google.cloud.bigtable.cluster.Cluster object at 0x7f98b8c8e790>
location_id = 'us-central1-c'
instance_labels = {'python-system': '2022-08-21t01-54-59'}
instances_to_delete = [<google.cloud.bigtable.instance.Instance object at 0x7f98b8384ee0>]
skip_on_emulator = None

def test_instance_create_w_two_clusters(
    admin_client,
    unique_suffix,
    admin_instance_populated,
    admin_cluster,
    location_id,
    instance_labels,
    instances_to_delete,
    skip_on_emulator,
):
    alt_instance_id = f"dif{unique_suffix}"
    instance = admin_client.instance(
        alt_instance_id,
        instance_type=enums.Instance.Type.PRODUCTION,
        labels=instance_labels,
    )

    serve_nodes = 1

    alt_cluster_id_1 = f"{alt_instance_id}-c1"
    cluster_1 = instance.cluster(
        alt_cluster_id_1,
        location_id=location_id,
        serve_nodes=serve_nodes,
        default_storage_type=enums.StorageType.HDD,
    )

    alt_cluster_id_2 = f"{alt_instance_id}-c2"
    location_id_2 = "us-central1-f"
    cluster_2 = instance.cluster(
        alt_cluster_id_2,
        location_id=location_id_2,
        serve_nodes=serve_nodes,
        default_storage_type=enums.StorageType.HDD,
    )
    operation = instance.create(clusters=[cluster_1, cluster_2])
    instances_to_delete.append(instance)
  operation.result(timeout=120)  # Ensure the operation completes.

tests/system/test_instance_admin.py:247:


.nox/system-3-8/lib/python3.8/site-packages/google/api_core/future/polling.py:132: in result
self._blocking_poll(timeout=timeout, **kwargs)


self = <google.api_core.operation.Operation object at 0x7f98b838c0a0>
timeout = 120, retry = <google.api_core.retry.Retry object at 0x7f98ba804250>

def _blocking_poll(self, timeout=None, retry=DEFAULT_RETRY):
    """Poll and wait for the Future to be resolved.

    Args:
        timeout (int):
            How long (in seconds) to wait for the operation to complete.
            If None, wait indefinitely.
    """
    if self._result_set:
        return

    retry_ = self._retry.with_deadline(timeout)

    try:
        kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry}
        retry_(self._done_or_raise)(**kwargs)
    except exceptions.RetryError:
      raise concurrent.futures.TimeoutError(
            "Operation did not complete within the designated " "timeout."
        )

E concurrent.futures._base.TimeoutError: Operation did not complete within the designated timeout.

.nox/system-3-8/lib/python3.8/site-packages/google/api_core/future/polling.py:112: TimeoutError

Looks like this issue is flaky. 😟

I'm going to leave this open and stop commenting.

A human should fix and close this.


When run at the same commit (e30a34b), this test passed in one build (Build Status, Sponge) and failed in another build (Build Status, Sponge).

Last failed test was Oct 28.