Add check for high RAM usage

We had a case where someone took off with an experimental
system with 100% RAM usage on the embedded system
without noticing. This lead to problems during flight.

Since we already have a CPU load check it seems natural
to also check the reported RAM usage.
This commit is contained in:
Matthias Grob 2024-06-26 18:44:14 +02:00
parent 30b854da35
commit e4446adba1
4 changed files with 48 additions and 7 deletions

View File

@ -157,6 +157,7 @@ param set-default CBRK_SUPPLY_CHK 894281
# disable check, no CPU load reported on posix yet
param set-default COM_CPU_MAX -1
param set-default COM_RAM_MAX -1
# Don't require RC calibration and configuration
param set-default COM_RC_IN_MODE 1

View File

@ -43,7 +43,10 @@ CpuResourceChecks::CpuResourceChecks()
void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
{
if (_param_com_cpu_max.get() < FLT_EPSILON) {
const bool cpu_load_check_enabled = _param_com_cpu_max.get() > FLT_EPSILON;
const bool ram_usage_check_enabled = _param_com_ram_max.get() > FLT_EPSILON;
if (!cpu_load_check_enabled && !ram_usage_check_enabled) {
return;
}
@ -54,15 +57,15 @@ void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
/* EVENT
* @description
* <profile name="dev">
* If the system does not provide any CPU load information, use the parameter <param>COM_CPU_MAX</param>
* to disable the check.
* If the system does not provide any CPU and RAM load information, use the parameters <param>COM_CPU_MAX</param>
* and <param>COM_RAM_MAX</param> to disable the checks.
* </profile>
*/
reporter.healthFailure(NavModes::All, health_component_t::system, events::ID("check_missing_cpuload"),
events::Log::Error, "No CPU load information");
events::Log::Error, "No CPU and RAM load information");
if (reporter.mavlink_log_pub()) {
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: No CPU load information");
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: No CPU and RAM load information");
}
} else {
@ -71,7 +74,7 @@ void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
_high_cpu_load_hysteresis.set_state_and_update(high_cpu_load, hrt_absolute_time());
// fail check if CPU load is above the threshold for 2 seconds
if (_high_cpu_load_hysteresis.get_state()) {
if (cpu_load_check_enabled && _high_cpu_load_hysteresis.get_state()) {
/* EVENT
* @description
* The CPU load can be reduced for example by disabling unused modules (e.g. mavlink instances) or reducing the gyro update
@ -88,5 +91,26 @@ void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: CPU load too high: %3.1f%%", (double)cpuload_percent);
}
}
const float ram_usage_percent = cpuload.ram_usage * 100.f;
const bool high_ram_usage = ram_usage_percent > _param_com_ram_max.get();
if (ram_usage_check_enabled && high_ram_usage) {
/* EVENT
* @description
* The RAM usage can be reduced for example by disabling unused modules (e.g. mavlink instances).
*
* <profile name="dev">
* The threshold can be adjusted via <param>COM_RAM_MAX</param> parameter.
* </profile>
*/
reporter.healthFailure<float>(NavModes::All, health_component_t::system, events::ID("check_ram_usage_too_high"),
events::Log::Error, "RAM usage too high: {1:.1}%", ram_usage_percent);
if (reporter.mavlink_log_pub()) {
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: RAM usage too high: %3.1f%%",
(double)ram_usage_percent);
}
}
}
}

View File

@ -54,6 +54,7 @@ private:
systemlib::Hysteresis _high_cpu_load_hysteresis{false};
DEFINE_PARAMETERS_CUSTOM_PARENT(HealthAndArmingCheckBase,
(ParamFloat<px4::params::COM_CPU_MAX>) _param_com_cpu_max
(ParamFloat<px4::params::COM_CPU_MAX>) _param_com_cpu_max,
(ParamFloat<px4::params::COM_RAM_MAX>) _param_com_ram_max
)
};

View File

@ -802,6 +802,21 @@ PARAM_DEFINE_FLOAT(COM_KILL_DISARM, 5.0f);
*/
PARAM_DEFINE_FLOAT(COM_CPU_MAX, 95.0f);
/**
* Maximum allowed RAM usage to pass checks
*
* The check fails if the RAM usage is above this threshold.
*
* A negative value disables the check.
*
* @group Commander
* @unit %
* @min -1
* @max 100
* @increment 1
*/
PARAM_DEFINE_FLOAT(COM_RAM_MAX, 95.0f);
/**
* Required number of redundant power modules
*