mirror of
https://gitee.com/mirrors_PX4/PX4-Autopilot.git
synced 2026-04-14 10:07:39 +08:00
fix(commander): calibration CPU starvation on linux (#26608)
On Linux targets with high-rate external sensor data (>1000Hz), all sensor calibrations (gyro, accel, mag) can freeze PX4 by starving other threads of CPU. Normal flight is unaffected — only calibration triggers the problem. Two compounding issues in the calibration worker threads: 1. calibrate_cancel_check() creates a new uORB::Subscription on every call, which triggers getDeviceNodeLocked() — an O(n) linear strcmp scan through all uORB nodes. In gyro/mag calibration this was called on every sensor sample, consuming the majority of CPU in strcmp alone. 2. SubscriptionBlocking::updatedBlocking() returns immediately when data is already available (it only blocks when no data is pending). With continuous high-rate sensor data, the calibration loops never yield, spinning at 100% CPU. These problems are addressed with this patch as follows: - Throttle calibrate_cancel_check() to once per 200ms in gyro and mag calibration loops. - Add 1ms px4_usleep() yield before updatedBlocking()/updateBlocking() in all calibration loops (gyro, accel, mag, orientation detection). This caps the effective loop rate at ~1000Hz — still far above what calibration needs (250-750 samples). - Force Commander main loop to sleep during calibration so it does not compete with calibration worker threads for CPU. Tested under Linux (x64, aarch64) both with RT and non-RT scheduling, with sensor data arriving at ~3600Hz. Calibration completes normally and no longer results in a deadlocked process.
This commit is contained in:
parent
7ee02968ac
commit
5cdf5ac482
@ -2032,8 +2032,10 @@ void Commander::run()
|
||||
|
||||
perf_end(_loop_perf);
|
||||
|
||||
// sleep if there are no vehicle_commands or action_requests to process
|
||||
if (!_vehicle_command_sub.updated() && !_action_request_sub.updated()) {
|
||||
// Always sleep during calibration to avoid competing with calibration
|
||||
// worker threads for CPU. Otherwise, sleep only when idle.
|
||||
if (_vehicle_status.calibration_enabled
|
||||
|| (!_vehicle_command_sub.updated() && !_action_request_sub.updated())) {
|
||||
px4_usleep(COMMANDER_MONITORING_INTERVAL);
|
||||
}
|
||||
}
|
||||
|
||||
@ -178,6 +178,10 @@ static calibrate_return read_accelerometer_avg(accel_worker_data_s *worker_data,
|
||||
|
||||
/* use the first sensor to pace the readout, but do per-sensor counts */
|
||||
while (counts[0] < samples_num) {
|
||||
// Yield CPU — updatedBlocking() returns immediately when data is
|
||||
// already available, so with high-rate sensors the loop never blocks.
|
||||
px4_usleep(1000);
|
||||
|
||||
if (accel_sub[0].updatedBlocking(100000)) {
|
||||
for (unsigned accel_index = 0; accel_index < MAX_ACCEL_SENS; accel_index++) {
|
||||
sensor_accel_s arp;
|
||||
|
||||
@ -90,6 +90,10 @@ enum detect_orientation_return detect_orientation(orb_advert_t *mavlink_log_pub,
|
||||
uORB::SubscriptionBlocking<vehicle_acceleration_s> vehicle_acceleration_sub{ORB_ID(vehicle_acceleration)};
|
||||
|
||||
while (true) {
|
||||
// Yield CPU — updateBlocking() returns immediately when data is
|
||||
// already available, so with high-rate sensors the loop never blocks.
|
||||
px4_usleep(1000);
|
||||
|
||||
vehicle_acceleration_s accel;
|
||||
|
||||
if (vehicle_acceleration_sub.updateBlocking(accel, 100000)) {
|
||||
|
||||
@ -59,6 +59,8 @@
|
||||
#include <uORB/SubscriptionBlocking.hpp>
|
||||
#include <uORB/topics/sensor_gyro.h>
|
||||
|
||||
using namespace time_literals;
|
||||
|
||||
static constexpr char sensor_name[] {"gyro"};
|
||||
static constexpr unsigned MAX_GYROS = 4;
|
||||
|
||||
@ -91,12 +93,24 @@ static calibrate_return gyro_calibration_worker(gyro_worker_data_t &worker_data)
|
||||
|
||||
/* use slowest gyro to pace, but count correctly per-gyro for statistics */
|
||||
unsigned slow_count = 0;
|
||||
hrt_abstime last_cancel_check = hrt_absolute_time();
|
||||
|
||||
while (slow_count < CALIBRATION_COUNT) {
|
||||
if (calibrate_cancel_check(worker_data.mavlink_log_pub, calibration_started)) {
|
||||
return calibrate_return_cancelled;
|
||||
// Throttle cancel check — calibrate_cancel_check() creates a new
|
||||
// uORB::Subscription each call, triggering an O(n) topic lookup.
|
||||
// At high sensor rates this dominates CPU. Check at most every 200ms.
|
||||
if (hrt_elapsed_time(&last_cancel_check) > 200_ms) {
|
||||
last_cancel_check = hrt_absolute_time();
|
||||
|
||||
if (calibrate_cancel_check(worker_data.mavlink_log_pub, calibration_started)) {
|
||||
return calibrate_return_cancelled;
|
||||
}
|
||||
}
|
||||
|
||||
// Yield CPU — updatedBlocking() returns immediately when data is
|
||||
// already available, so with high-rate sensors the loop never blocks.
|
||||
px4_usleep(1000);
|
||||
|
||||
if (gyro_sub[0].updatedBlocking(100000)) {
|
||||
unsigned update_count = CALIBRATION_COUNT;
|
||||
|
||||
|
||||
@ -278,14 +278,22 @@ static calibrate_return mag_calibration_worker(detect_orientation_return orienta
|
||||
|
||||
uORB::SubscriptionBlocking<sensor_gyro_s> gyro_sub{ORB_ID(sensor_gyro)};
|
||||
|
||||
hrt_abstime last_cancel_check = hrt_absolute_time();
|
||||
|
||||
while (fabsf(gyro_x_integral) < gyro_int_thresh_rad &&
|
||||
fabsf(gyro_y_integral) < gyro_int_thresh_rad &&
|
||||
fabsf(gyro_z_integral) < gyro_int_thresh_rad) {
|
||||
|
||||
/* abort on request */
|
||||
if (calibrate_cancel_check(worker_data->mavlink_log_pub, calibration_started)) {
|
||||
result = calibrate_return_cancelled;
|
||||
return result;
|
||||
// Throttle cancel check — calibrate_cancel_check() creates a new
|
||||
// uORB::Subscription each call, triggering an O(n) topic lookup.
|
||||
// At high sensor rates this dominates CPU. Check at most every 200ms.
|
||||
if (hrt_elapsed_time(&last_cancel_check) > 200_ms) {
|
||||
last_cancel_check = hrt_absolute_time();
|
||||
|
||||
if (calibrate_cancel_check(worker_data->mavlink_log_pub, calibration_started)) {
|
||||
result = calibrate_return_cancelled;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/* abort with timeout */
|
||||
@ -297,6 +305,10 @@ static calibrate_return mag_calibration_worker(detect_orientation_return orienta
|
||||
}
|
||||
|
||||
/* Wait clocking for new data on all gyro */
|
||||
// Yield CPU — updateBlocking() returns immediately when data is
|
||||
// already available, so with high-rate sensors the loop never blocks.
|
||||
px4_usleep(1000);
|
||||
|
||||
sensor_gyro_s gyro;
|
||||
|
||||
if (gyro_sub.updateBlocking(gyro, 1000_ms)) {
|
||||
@ -326,14 +338,27 @@ static calibrate_return mag_calibration_worker(detect_orientation_return orienta
|
||||
unsigned poll_errcount = 0;
|
||||
unsigned calibration_counter_side = 0;
|
||||
|
||||
last_cancel_check = hrt_absolute_time();
|
||||
|
||||
while (hrt_absolute_time() < calibration_deadline &&
|
||||
calibration_counter_side < worker_data->calibration_points_perside) {
|
||||
|
||||
if (calibrate_cancel_check(worker_data->mavlink_log_pub, calibration_started)) {
|
||||
result = calibrate_return_cancelled;
|
||||
break;
|
||||
// Throttle cancel check — calibrate_cancel_check() creates a new
|
||||
// uORB::Subscription each call, triggering an O(n) topic lookup.
|
||||
// At high sensor rates this dominates CPU. Check at most every 200ms.
|
||||
if (hrt_elapsed_time(&last_cancel_check) > 200_ms) {
|
||||
last_cancel_check = hrt_absolute_time();
|
||||
|
||||
if (calibrate_cancel_check(worker_data->mavlink_log_pub, calibration_started)) {
|
||||
result = calibrate_return_cancelled;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Yield CPU — updatedBlocking() returns immediately when data is
|
||||
// already available, so with high-rate sensors the loop never blocks.
|
||||
px4_usleep(1000);
|
||||
|
||||
if (mag_sub[0].updatedBlocking(1000_ms)) {
|
||||
bool rejected = false;
|
||||
Vector3f new_samples[MAX_MAGS] {};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user