BehaviorTree/BehaviorTree.ROS2

Error when canceling action during halt()

Opened this issue · 8 comments

I'm running in a weird behavior when trying to halting an action node.

Setup:
BehaviorTree.ROS2 branch humble bb0d510
BehaviorTree.CPP branch master acbc707

Structure:
ws
|- BehaviorTree.ROS2
|- BehaviorTree.CPP
|- test
|- CMakeLists.txt
|- package.xml
|- src
|- bt_act_test.cpp

CMakeLists.txt:

cmake_minimum_required(VERSION 3.8)
project(bt_act_test)

if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
    add_compile_options(-Wall -Wextra -Wpedantic)
endif()

# find dependencies
find_package(ament_cmake REQUIRED)
find_package(rclcpp REQUIRED )
find_package(rclcpp_action REQUIRED )
find_package(behaviortree_cpp REQUIRED )
find_package(behaviortree_ros2 REQUIRED )
find_package(example_interfaces REQUIRED)

add_executable(bt_act_test src/bt_act_test.cpp)
target_include_directories(bt_act_test PUBLIC
    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
    $<INSTALL_INTERFACE:include>
)
ament_target_dependencies(bt_act_test
    rclcpp
    rclcpp_action
    behaviortree_cpp
    behaviortree_ros2
    example_interfaces
)
    

if(BUILD_TESTING)
    find_package(ament_lint_auto REQUIRED)
    # the following line skips the linter which checks for copyrights
    # comment the line when a copyright and license is added to all source files
    set(ament_cmake_copyright_FOUND TRUE)
    # the following line skips cpplint (only works in a git repo)
    # comment the line when this package is in a git repo and when
    # a copyright and license is added to all source files
    set(ament_cmake_cpplint_FOUND TRUE)
    ament_lint_auto_find_test_dependencies()
endif()

install(TARGETS 
    bt_act_test
DESTINATION lib/${PROJECT_NAME})    

ament_package()

package.xml:

...
    <buildtool_depend>ament_cmake</buildtool_depend>

    <depend>rclcpp</depend>
    <depend>rclcpp_action</depend>
    <depend>behaviortree_ros2</depend>
    <depend>behaviortree_cpp</depend>
    <depend>example_interfaces</depend>

    <test_depend>ament_lint_auto</test_depend>
    <test_depend>ament_lint_common</test_depend>

    <export>
      <build_type>ament_cmake</build_type>
    </export>
...

bt_act_test.cpp:

#include <stdio.h>
#include <string>
#include <chrono>
#include <thread>
#include <memory>
#include <filesystem>
#include <vector>

#include <example_interfaces/action/fibonacci.hpp>
#include <rclcpp/rclcpp.hpp>
#include <rclcpp_action/rclcpp_action.hpp>

#include <behaviortree_ros2/bt_action_node.hpp>

using vec_int = std::vector<int32_t>;
using Fibonacci = example_interfaces::action::Fibonacci;
using GoalHandleFibonacci = rclcpp_action::ServerGoalHandle<Fibonacci>;



void sleep(int ms=10){
    std::this_thread::sleep_for(std::chrono::milliseconds(ms));
}


class FibonacciActionServer : public rclcpp::Node {  

public:

    explicit FibonacciActionServer(const rclcpp::NodeOptions & options = rclcpp::NodeOptions())
    : Node("server_node", options)
    {
        using namespace std::placeholders;

        this->action_server_ = rclcpp_action::create_server<Fibonacci>(
            this,
            "fibonacci",
            std::bind(&FibonacciActionServer::handle_goal, this, _1, _2),
            std::bind(&FibonacciActionServer::handle_cancel, this, _1),
            std::bind(&FibonacciActionServer::handle_accepted, this, _1)    
        );
    }

private:
    rclcpp_action::Server<Fibonacci>::SharedPtr action_server_;

    rclcpp_action::GoalResponse handle_goal(
        const rclcpp_action::GoalUUID & uuid,
        std::shared_ptr<const Fibonacci::Goal> goal)
    {
        RCLCPP_INFO(this->get_logger(), "Received goal request with order %d", goal->order);
        (void)uuid;
        return rclcpp_action::GoalResponse::ACCEPT_AND_EXECUTE;
    }

    rclcpp_action::CancelResponse handle_cancel(
        const std::shared_ptr<GoalHandleFibonacci> goal_handle)
    {
        RCLCPP_INFO(this->get_logger(), "Received request to cancel goal");
        (void)goal_handle;
        return rclcpp_action::CancelResponse::ACCEPT;
    }

    void handle_accepted(const std::shared_ptr<GoalHandleFibonacci> goal_handle)
    {
        using namespace std::placeholders;
        // this needs to return quickly to avoid blocking the executor, so spin up a new thread
        std::thread{std::bind(&FibonacciActionServer::execute, this, _1), goal_handle}.detach();
    }

    void execute(const std::shared_ptr<GoalHandleFibonacci> goal_handle)
    {
        RCLCPP_INFO(this->get_logger(), "Executing goal");
        rclcpp::Rate loop_rate(1);
        const auto goal = goal_handle->get_goal();
        auto feedback = std::make_shared<Fibonacci::Feedback>();
        auto & sequence = feedback->sequence;
        sequence.push_back(0);
        sequence.push_back(1);
        auto result = std::make_shared<Fibonacci::Result>();

        for (int i = 1; (i < goal->order) && rclcpp::ok(); ++i) {
            // Check if there is a cancel request
            if (goal_handle->is_canceling()) {
                result->sequence = sequence;
                goal_handle->canceled(result);
                RCLCPP_INFO(this->get_logger(), "Goal canceled");
                return;
            }
            // Update sequence
            sequence.push_back(sequence[i] + sequence[i - 1]);
            // Publish feedback
            goal_handle->publish_feedback(feedback);
            RCLCPP_INFO(this->get_logger(), "Publish feedback");

            loop_rate.sleep();
        }

        // Check if goal is done
        if (rclcpp::ok()) {
            result->sequence = sequence;
            goal_handle->succeed(result);
            RCLCPP_INFO(this->get_logger(), "Goal succeeded");
        }
    }
};

using namespace BT;

class FibonacciAction: public RosActionNode<Fibonacci>
{
public:
  FibonacciAction(const std::string& name,
                  const NodeConfig& conf,
                  const RosNodeParams& params)
    : RosActionNode<Fibonacci>(name, conf, params)
  {}

  // The specific ports of this Derived class
  // should be merged with the ports of the base class,
  // using RosActionNode::providedBasicPorts()
  static PortsList providedPorts()
  {
    return providedBasicPorts({InputPort<unsigned>("order"), OutputPort<vec_int>("result")});
  }

  // This is called when the TreeNode is ticked and it should
  // send the request to the action server
  bool setGoal(RosActionNode::Goal& goal) override 
  {
    // get "order" from the Input port
    getInput("order", goal.order);
    // return true, if we were able to set the goal correctly.
    return true;
  }
  
  // Callback executed when the reply is received.
  // Based on the reply you may decide to return SUCCESS or FAILURE.
  NodeStatus onResultReceived(const WrappedResult& wr) override
  {
    std::stringstream ss;
    ss << "Result received: ";
    for (auto number : wr.result->sequence) {
      ss << number << " ";
    }
    RCLCPP_INFO(node_->get_logger(), ss.str().c_str());
    this->setOutput<vec_int>("result", wr.result->sequence);
    return NodeStatus::SUCCESS;
  }

  // Callback invoked when there was an error at the level
  // of the communication between client and server.
  // This will set the status of the TreeNode to either SUCCESS or FAILURE,
  // based on the return value.
  // If not overridden, it will return FAILURE by default.
  virtual NodeStatus onFailure(ActionNodeErrorCode error) override
  {
    RCLCPP_ERROR(node_->get_logger(), "Error: %d", error);
    return NodeStatus::FAILURE;
  }

  // we also support a callback for the feedback, as in
  // the original tutorial.
  // Usually, this callback should return RUNNING, but you
  // might decide, based on the value of the feedback, to abort
  // the action, and consider the TreeNode completed.
  // In that case, return SUCCESS or FAILURE.
  // The Cancel request will be send automatically to the server.
  NodeStatus onFeedback(const std::shared_ptr<const Feedback> feedback)
  {
    std::stringstream ss;
    ss << "Next number in sequence received: ";
    for (auto number : feedback->sequence) {
      ss << number << " ";
    }
    RCLCPP_INFO(node_->get_logger(), ss.str().c_str());
    this->setOutput<vec_int>("result", feedback->sequence);
    return NodeStatus::RUNNING;
  }
};

template<typename T>
class GetBBvalueNode : public BT::SyncActionNode{
public:
    GetBBvalueNode (
        const std::string& name, const BT::NodeConfig& config, 
        std::function<BT::NodeStatus(T)> callback,
        std::function<BT::NodeStatus(BT::Expected<T>)> no_value_callback = [](BT::Expected<T> input){
            throw BT::RuntimeError("missing required input [input]: ", input.error() );
            return BT::NodeStatus::FAILURE; //needed to pass contructor checks
        }
    ) : 
    BT::SyncActionNode(name, config),
    _callback(callback),
    _no_value_callback(no_value_callback) {}

    BT::NodeStatus tick() override{
        auto input = getInput<T>("input"); //get value from port "input"

        // Check if expected is valid. If not, throw its error
        if (!input) return this->_no_value_callback(input);
        else return this->_callback(input.value());
    }

    /**
     * @brief set port input as <T> input
    */
    static BT::PortsList providedPorts(){
        return{ BT::InputPort<T>("input", "read value")};
    }

private:
    std::function<BT::NodeStatus(T)> _callback;
    std::function<BT::NodeStatus(BT::Expected<T>)> _no_value_callback;
};

int main(int argc, char *argv[]){
    rclcpp::init(argc, argv);

    static const char* xml_text = R"(
    <root BTCPP_format="4" >
        <BehaviorTree ID="MainTree">
            <ReactiveSequence>
                <GetVec input="{res}"/>
                <Fibonacci  order="5"
                            result="{res}"/>
            </ReactiveSequence>
        </BehaviorTree>
    </root>
    )";
    
    std::vector<vec_int> res_vecs;

    auto get_vec = [&res_vecs](vec_int input){
        if ( std::find(res_vecs.begin(), res_vecs.end(), input) == res_vecs.end() ){ 
            res_vecs.push_back(input);
            for(auto el : res_vecs.back()) std::cout << el << "; ";
            std::cout << std::endl;
        } 
        if (res_vecs.size() == 2) return BT::NodeStatus::FAILURE;
        else return BT::NodeStatus::SUCCESS;
    };
    auto on_err = [](BT::Expected<vec_int> input){
        (void)input; 
        return BT::NodeStatus::SUCCESS;
    };


    BehaviorTreeFactory factory;
    // provide the ROS node and the name of the action service
    RosNodeParams params; 
    auto node = std::make_shared<rclcpp::Node>("fibonacci_action_client");
    params.nh = node;
    params.default_port_value = "fibonacci";
    factory.registerNodeType<FibonacciAction>("Fibonacci", params);
    factory.registerNodeType<GetBBvalueNode<vec_int>>("GetVec", get_vec, on_err);

    
    auto server_node = std::make_shared<FibonacciActionServer>();
    auto spin_callback = [](rclcpp::Node::SharedPtr node){rclcpp::spin(node);};
    std::thread{spin_callback, server_node}.detach();
    sleep();

    // tick the tree
    auto tree = factory.createTreeFromText(xml_text);
    tree.tickWhileRunning();

    rclcpp::shutdown();

    return 0;
}

The code is essentially copied-pasted from the tutorials BT ros2 integration and ROS2 action server.

When I try to run the executable I get the following:

root@f1e379732b0f:~/projects# ros2 run bt_act_test bt_act_test 
[INFO] [1687792324.400457200] [server_node]: Received goal request with order 5
[INFO] [1687792324.401037500] [server_node]: Executing goal
[INFO] [1687792324.401250200] [server_node]: Publish feedback
[INFO] [1687792324.421203700] [fibonacci_action_client]: Goal accepted by server, waiting for result
[INFO] [1687792325.401689100] [server_node]: Publish feedback
[INFO] [1687792325.407532400] [fibonacci_action_client]: Next number in sequence received: 0 1 1 2 
0; 1; 1; 2; 
[INFO] [1687792326.401572200] [server_node]: Publish feedback
[INFO] [1687792326.405681700] [fibonacci_action_client]: Next number in sequence received: 0 1 1 2 3 
0; 1; 1; 2; 3; 
[INFO] [1687792326.406163200] [server_node]: Received request to cancel goal
terminate called after throwing an instance of 'std::runtime_error'
  what():  Asked to publish result for goal that does not exist
[ros2run]: Aborted

Apologies for the long message, but I hope it will make the error reproduction easier.
Thanks in advance for the support

Stewil commented

I am having more or less the same issue. Our structure is suprisingly similar. However i am getting a different error message

[INFO] [1687943587.313014132] [action_test_client_1]: Goal handle: goal is 2
terminate called after throwing an instance of 'rclcpp_action::exceptions::UnknownGoalHandleError'
  what():  Goal handle is not known to this client

i do not get the Received request to cancel goal even though i have the same setup.

I noticed the error does not happen with a regular Sequence instead of a Reactive Sequence, however is not my intended behaviour.

this is my tree:

<?xml version="1.0" encoding="UTF-8"?>
<root BTCPP_format="4">
  <BehaviorTree ID="MainLogic">
    <ReactiveSequence>
      <Fallback>
        <GET_ROV_STATUS/>
        <ACTION_TEST_CLIENT_2 goal="1"/>
      </Fallback>
      <ACTION_TEST_CLIENT_1 goal="2"/>
    </ReactiveSequence>
  </BehaviorTree>

  <!-- Description of Node Models (used by Groot) -->
  <TreeNodesModel>
    <Action ID="ACTION_TEST_CLIENT_1"
            editable="true">
      <input_port name="goal"/>
    </Action>
    <Action ID="ACTION_TEST_CLIENT_2"
            editable="true">
      <input_port name="goal"/>
    </Action>
    <Action ID="GET_ROV_STATUS"
            editable="true"/>
  </TreeNodesModel>

</root>

adding my gdb backtrace in hopes of this helping:

#0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737339827200)
    at ./nptl/pthread_kill.c:44
#1  __pthread_kill_internal (signo=6, threadid=140737339827200) at ./nptl/pthread_kill.c:78
#2  __GI___pthread_kill (threadid=140737339827200, signo=signo@entry=6)
    at ./nptl/pthread_kill.c:89
#3  0x00007ffff74f3476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4  0x00007ffff74d97f3 in __GI_abort () at ./stdlib/abort.c:79
#5  0x00007ffff779dbbe in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
#6  0x00007ffff77a924c in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
#7  0x00007ffff77a92b7 in std::terminate() () from /lib/x86_64-linux-gnu/libstdc++.so.6
#8  0x00007ffff77a9518 in __cxa_throw () from /lib/x86_64-linux-gnu/libstdc++.so.6
#9  0x0000555555598600 in rclcpp_action::Client<example_interfaces::action::Fibonacci>::async_cancel_goal(std::shared_ptr<rclcpp_action::ClientGoalHandle<example_interfaces::action::Fibonacci> >, std::function<void (std::shared_ptr<action_msgs::srv::CancelGoal_Response_<std::allocator<void> > >)>) (this=0x555555ba6460, 
    goal_handle=std::shared_ptr<rclcpp_action::ClientGoalHandle<example_interfaces::action::Fibonacci>> (empty) = {...}, cancel_callback=...)
    at /opt/ros/humble/include/rclcpp_action/rclcpp_action/client.hpp:536
#10 0x00005555555951a0 in BT::RosActionNode<example_interfaces::action::Fibonacci>::cancelGoal (this=0x555555b85c20)
    at /---/---/ros2ws/install/behaviortree_ros2/include/behaviortree_ros2/behaviortree_ros2/bt_action_node.hpp:408
#11 0x0000555555591a44 in BT::RosActionNode<example_interfaces::action::Fibonacci>::halt (
    this=0x555555b85c20)
--Type <RET> for more, q to quit, c to continue without paging--
    at /---/---/ros2ws/install/behaviortree_ros2/include/behaviortree_ros2/behaviortree_ros2/bt_action_node.hpp:401
#12 0x00007ffff7c9f2c3 in BT::TreeNode::haltNode() ()
   from /usr/local/lib/libbehaviortree_cpp.so
#13 0x00007ffff7c9d7a8 in BT::ControlNode::haltChild(unsigned long) ()
   from /usr/local/lib/libbehaviortree_cpp.so
#14 0x00007ffff7ce0ecb in BT::ReactiveSequence::tick() ()
   from /usr/local/lib/libbehaviortree_cpp.so
#15 0x00007ffff7ca099d in BT::TreeNode::executeTick() ()
   from /usr/local/lib/libbehaviortree_cpp.so
#16 0x00007ffff7c6cc0e in BT::Tree::tickRoot(BT::Tree::TickOption, std::chrono::duration<long, std::ratio<1l, 1000l> >) () from /usr/local/lib/libbehaviortree_cpp.so
#17 0x0000555555566f21 in main (argc=1, argv=0x7fffffffd888)
    at /---/---/ros2ws/src/shared_playground/src/behavior_tree.cpp:40

i hope we can find out what's causing this. To me it seems like the action stems from the action server goal cancel itself, but i do not understand why it works with a regular Sequence then.

I refactored the code.
I will try your example soon

Hi,
I somehow landed into the same runtime error

I am not really sure why one of our Custom BT Nodes based on the bt_action_node is falling under this behavior. As far as I have seen, there is nothing out of the ordinary with it.

Anyways, I opted to handle this behavior by catching the exception that is thrown by the action_client_->async_get_result and action_client_->async_cancel_goal as done in https://github.com/ErickKramer/BehaviorTree.ROS2/blob/c125b15e5dee9b697bcdf7c1c68213f159b9a9f0/behaviortree_ros2/include/behaviortree_ros2/bt_action_node.hpp#L439

I'd be glad to open PR if you find such a solution good enough.

Hi, I somehow landed into the same runtime error

I am not really sure why one of our Custom BT Nodes based on the bt_action_node is falling under this behavior. As far as I have seen, there is nothing out of the ordinary with it.

Anyways, I opted to handle this behavior by catching the exception that is thrown by the action_client_->async_get_result and action_client_->async_cancel_goal as done in https://github.com/ErickKramer/BehaviorTree.ROS2/blob/c125b15e5dee9b697bcdf7c1c68213f159b9a9f0/behaviortree_ros2/include/behaviortree_ros2/bt_action_node.hpp#L439

I'd be glad to open PR if you find such a solution good enough.

Further insight. This seems to happen when setGoal of the Action Node hasn't finished by the time the halt is triggered.

I'm getting the exact same error as @ErickKramer.

ok, I will investigate

I ran into the same issue as well. It appears to be a race condition between server-side goal acceptance / rejection and client side cancellation.

I "solved" it by adding the following to the top of RosActionNode<T>::cancelGoal():

template<class T> inline
  void RosActionNode<T>::cancelGoal()
{
  // block until we get the goal handle (need it to cancel)
  if( !goal_received_ )
  {
    // process callbacks until future returns (could be indefinite...)
    callback_group_executor_.spin_until_future_complete(future_goal_handle_);

    // update variables
    goal_received_ = true;
    goal_handle_ = future_goal_handle_.get();
    future_goal_handle_ = {};

    // if no goal handle the goal was rejected; easy exit
    if( !goal_handle_ )
    {
      return;
    }
  }
  // ... rest of function unchanged

Unfortunately this won't work in the general case since it introduces a potentially indefinite blocking call into cancelGoal... not sure what the right solution looks like.

I faced similar issues in case of inherited RosActionNode. [in commit : bdb778a]

In my case, most issues occurs due to not catching up GoalHandler in RosActionNode.
It appears that there is a need for a term to be invoked or scheduled in an executor.

In RosActionNode, implemented like this :

      auto nodelay = std::chrono::milliseconds(0);
      auto timeout = rclcpp::Duration::from_seconds( double(server_timeout_.count()) / 1000);
      auto ret = callback_group_executor_.spin_until_future_complete(future_goal_handle_, nodelay);

That executor tries to find out goal_handler asynchronously, but spin_until_future_complete seems to check out only one execution at once, while other executions are also present in it. So it appears to need a term to be invoked.

In my case, replacing it with callback_group_executor_.spin_until_future_complete(future_goal_handle_, std::chrono::milliseconds(30)) works well.