[WIP] logger priority boost command line

This commit is contained in:
Daniel Agar 2023-02-10 19:22:40 -05:00
parent edb6c635d5
commit 9766fcfb6f
No known key found for this signature in database
GPG Key ID: FD3CBA98017A69DE
6 changed files with 130 additions and 69 deletions

View File

@ -141,8 +141,12 @@ typedef struct {
// The navigation system needs to execute regularly but has no realtime needs
#define SCHED_PRIORITY_NAVIGATION (SCHED_PRIORITY_DEFAULT + 5)
// SCHED_PRIORITY_DEFAULT
#define SCHED_PRIORITY_LOG_WRITER (SCHED_PRIORITY_DEFAULT - 10)
#define SCHED_PRIORITY_PARAMS (SCHED_PRIORITY_DEFAULT - 15)
#define SCHED_PRIORITY_LOG_WRITER (SCHED_PRIORITY_DEFAULT - 40)
// SCHED_PRIORITY_IDLE
typedef int (*px4_main_t)(int argc, char *argv[]);

View File

@ -301,7 +301,7 @@ int LogWriterFile::thread_start()
sched_param param;
/* low priority, as this is expensive disk I/O */
param.sched_priority = SCHED_PRIORITY_DEFAULT - 40;
param.sched_priority = SCHED_PRIORITY_LOG_WRITER;
(void)pthread_attr_setschedparam(&thr_attr, &param);
pthread_attr_setstacksize(&thr_attr, PX4_STACK_ADJUSTED(1170));

View File

@ -36,7 +36,6 @@
#include "logged_topics.h"
#include "logger.h"
#include "messages.h"
#include "watchdog.h"
#include <dirent.h>
#include <sys/stat.h>
@ -83,47 +82,6 @@
using namespace px4::logger;
using namespace time_literals;
struct timer_callback_data_s {
px4_sem_t semaphore;
watchdog_data_t watchdog_data;
volatile bool watchdog_triggered = false;
};
/* This is used to schedule work for the logger (periodic scan for updated topics) */
static void timer_callback(void *arg)
{
/* Note: we are in IRQ context here (on NuttX) */
timer_callback_data_s *data = (timer_callback_data_s *)arg;
int semaphore_value = 0;
px4_sem_getvalue(&data->semaphore, &semaphore_value);
/* check the value of the semaphore: if the logger cannot keep up with running it's main loop as fast
* as the timer_callback here increases the semaphore count, the counter would increase unbounded,
* leading to an overflow at some point. This case we want to avoid here, so we check the current
* value against a (somewhat arbitrary) threshold, and avoid calling sem_post() if it's exceeded.
* (it's not a problem if the threshold is a bit too large, it just means the logger will do
* multiple iterations at once, the next time it's scheduled).
* As the watchdog also uses the counter we use a conservatively high value */
bool semaphore_value_saturated = semaphore_value > 100;
if (watchdog_update(data->watchdog_data, semaphore_value_saturated)) {
data->watchdog_triggered = true;
}
if (semaphore_value_saturated) {
return;
}
px4_sem_post(&data->semaphore);
}
int logger_main(int argc, char *argv[])
{
// logger currently assumes little endian
@ -142,6 +100,35 @@ namespace px4
namespace logger
{
void Logger::timer_callback(void *arg)
{
// Note: we are in IRQ context here (on NuttX)
timer_callback_data_s *data = static_cast<timer_callback_data_s *>(arg);
int semaphore_value = 0;
px4_sem_getvalue(&data->semaphore, &semaphore_value);
/* check the value of the semaphore: if the logger cannot keep up with running it's main loop as fast
* as the timer_callback here increases the semaphore count, the counter would increase unbounded,
* leading to an overflow at some point. This case we want to avoid here, so we check the current
* value against a (somewhat arbitrary) threshold, and avoid calling sem_post() if it's exceeded.
* (it's not a problem if the threshold is a bit too large, it just means the logger will do
* multiple iterations at once, the next time it's scheduled).
* As the watchdog also uses the counter we use a conservatively high value */
bool semaphore_value_saturated = semaphore_value > 100;
if (watchdog_update(data->watchdog_data, semaphore_value_saturated)) {
data->watchdog_triggered.store(true);
}
if (semaphore_value_saturated) {
return;
}
px4_sem_post(&data->semaphore);
}
constexpr const char *Logger::LOG_ROOT[(int)LogType::Count];
int Logger::custom_command(int argc, char *argv[])
@ -161,6 +148,16 @@ int Logger::custom_command(int argc, char *argv[])
return 0;
}
if (!strcmp(argv[0], "priority_boost")) {
get_instance()->set_priority_boost_request(true);
return 0;
}
if (!strcmp(argv[0], "priority_restore")) {
get_instance()->set_priority_boost_request(false);
return 0;
}
return print_usage("unknown command");
}
@ -649,11 +646,10 @@ void Logger::run()
}
/* init the update timer */
struct hrt_call timer_call {};
timer_callback_data_s timer_callback_data;
px4_sem_init(&timer_callback_data.semaphore, 0, 0);
_timer_call = {};
px4_sem_init(&_timer_callback_data.semaphore, 0, 0);
/* timer_semaphore use case is a signal */
px4_sem_setprotocol(&timer_callback_data.semaphore, SEM_PRIO_NONE);
px4_sem_setprotocol(&_timer_callback_data.semaphore, SEM_PRIO_NONE);
int polling_topic_sub = -1;
@ -673,10 +669,10 @@ void Logger::run()
// sched_note_start is already called from pthread_create and task_create,
// which means we can expect to find the tasks in system_load.tasks, as required in watchdog_initialize
watchdog_initialize(pid_self, writer_thread, timer_callback_data.watchdog_data);
watchdog_initialize(pid_self, writer_thread, _timer_callback_data.watchdog_data);
}
hrt_call_every(&timer_call, _log_interval, _log_interval, timer_callback, &timer_callback_data);
hrt_call_every(&_timer_call, _log_interval, _log_interval, timer_callback, &_timer_callback_data);
}
// check for new subscription data
@ -703,9 +699,9 @@ void Logger::run()
/* check for logging command from MAVLink (start/stop streaming) */
handle_vehicle_command_update();
if (timer_callback_data.watchdog_triggered) {
timer_callback_data.watchdog_triggered = false;
if (_timer_callback_data.watchdog_triggered.load()) {
initialize_load_output(PrintLoadReason::Watchdog);
_timer_callback_data.watchdog_triggered.store(false);
}
@ -916,7 +912,7 @@ void Logger::run()
* And on linux this is quite accurate as well, but under NuttX it is not accurate,
* because usleep() has only a granularity of CONFIG_MSEC_PER_TICK (=1ms).
*/
while (px4_sem_wait(&timer_callback_data.semaphore) != 0) {}
while (px4_sem_wait(&_timer_callback_data.semaphore) != 0) {}
}
}
@ -925,8 +921,8 @@ void Logger::run()
stop_log_file(LogType::Full);
stop_log_file(LogType::Mission);
hrt_cancel(&timer_call);
px4_sem_destroy(&timer_callback_data.semaphore);
hrt_cancel(&_timer_call);
px4_sem_destroy(&_timer_callback_data.semaphore);
// stop the writer thread
_writer.thread_stop();
@ -1587,7 +1583,7 @@ void Logger::initialize_load_output(PrintLoadReason reason)
void Logger::write_load_output()
{
if (_print_load_reason == PrintLoadReason::Watchdog) {
PX4_ERR("Writing watchdog data"); // this is just that we see it easily in the log
PX4_WARN("Writing watchdog data"); // this is just that we see it easily in the log
}
perf_callback_data_t callback_data = {};
@ -2421,6 +2417,12 @@ $ logger on
PRINT_MODULE_USAGE_PARAM_FLOAT('c', 1.0, 0.2, 2.0, "Log rate factor (higher is faster)", true);
PRINT_MODULE_USAGE_COMMAND_DESCR("on", "start logging now, override arming (logger must be running)");
PRINT_MODULE_USAGE_COMMAND_DESCR("off", "stop logging now, override arming (logger must be running)");
#if defined(__PX4_NUTTX)
PRINT_MODULE_USAGE_COMMAND_DESCR("priority_boost", "boost logger and log writer to maximum priority (logger must be running)");
PRINT_MODULE_USAGE_COMMAND_DESCR("priority_restore", "restore logger and log writer priority to default (logger must be running)");
#endif // __PX4_NUTTX
PRINT_MODULE_USAGE_DEFAULT_COMMANDS();
return 0;

View File

@ -36,12 +36,14 @@
#include "log_writer.h"
#include "logged_topics.h"
#include "messages.h"
#include "watchdog.h"
#include <containers/Array.hpp>
#include "util.h"
#include <px4_platform_common/defines.h>
#include <drivers/drv_hrt.h>
#include <version/version.h>
#include <parameters/param.h>
#include <px4_platform_common/atomic.h>
#include <px4_platform_common/printload.h>
#include <px4_platform_common/module.h>
#include <px4_platform_common/module_params.h>
@ -56,6 +58,8 @@
#include <uORB/topics/vehicle_status.h>
#include <uORB/topics/parameter_update.h>
extern "C" __EXPORT int logger_main(int argc, char *argv[]);
using namespace time_literals;
@ -131,6 +135,8 @@ public:
void set_arm_override(bool override) { _manually_logging_override = override; }
void set_priority_boost_request(bool request_boost) { _timer_callback_data.watchdog_data.priority_boost_requested.store(request_boost); }
private:
enum class PrintLoadReason {
@ -326,6 +332,18 @@ private:
void adjust_subscription_updates();
/* This is used to schedule work for the logger (periodic scan for updated topics) */
static void timer_callback(void *arg);
struct timer_callback_data_s {
px4_sem_t semaphore{};
watchdog_data_t watchdog_data{};
px4::atomic_bool watchdog_triggered{false};
} _timer_callback_data{};
struct hrt_call _timer_call {};
uint8_t *_msg_buffer{nullptr};
int _msg_buffer_len{0};
@ -333,6 +351,7 @@ private:
bool _prev_state{false}; ///< previous state depending on logging mode (arming or aux1 state)
bool _manually_logging_override{false};
bool _request_priority_boost{false};
Statistics _statistics[(int)LogType::Count];
hrt_abstime _last_sync_time{0}; ///< last time a sync msg was sent

View File

@ -34,6 +34,7 @@
#include "watchdog.h"
#include <px4_platform_common/log.h>
#include <px4_platform_common/tasks.h>
#if defined(__PX4_NUTTX) && !defined(CONFIG_SCHED_INSTRUMENTATION)
# error watchdog support requires CONFIG_SCHED_INSTRUMENTATION
@ -54,9 +55,11 @@ bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_satura
if (system_load.initialized && watchdog_data.logger_main_task_index >= 0
&& watchdog_data.logger_writer_task_index >= 0) {
const hrt_abstime now = hrt_absolute_time();
const system_load_taskinfo_s &log_writer_task = system_load.tasks[watchdog_data.logger_writer_task_index];
if (log_writer_task.valid) {
const system_load_taskinfo_s &logger_main_task = system_load.tasks[watchdog_data.logger_main_task_index];
const system_load_taskinfo_s &logger_writer_task = system_load.tasks[watchdog_data.logger_writer_task_index];
if (logger_writer_task.valid) {
// Trigger the watchdog if the log writer task has been ready to run for a
// minimum duration and it has not been scheduled during that time.
// When the writer is waiting for an SD transfer, it is not in ready state, thus a long dropout
@ -68,12 +71,12 @@ bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_satura
// No need to lock the tcb access, since we are in IRQ context
// update the timestamp if it has been scheduled recently
if (log_writer_task.curr_start_time > watchdog_data.ready_to_run_timestamp) {
watchdog_data.ready_to_run_timestamp = log_writer_task.curr_start_time;
if (logger_writer_task.curr_start_time > watchdog_data.ready_to_run_timestamp) {
watchdog_data.ready_to_run_timestamp = logger_writer_task.curr_start_time;
}
// update the timestamp if not ready to run or if transitioned into ready to run
uint8_t current_state = log_writer_task.tcb->task_state;
uint8_t current_state = logger_writer_task.tcb->task_state;
if (current_state != TSTATE_TASK_READYTORUN
|| (watchdog_data.last_state != TSTATE_TASK_READYTORUN && current_state == TSTATE_TASK_READYTORUN)) {
@ -106,23 +109,52 @@ bool watchdog_update(watchdog_data_t &watchdog_data, bool semaphore_value_satura
watchdog_data.sem_counter_saturated_start = now;
}
if (now - watchdog_data.sem_counter_saturated_start > 3_s || now - watchdog_data.ready_to_run_timestamp > 1_s) {
if (!watchdog_data.priority_boosted
&& ((now - watchdog_data.sem_counter_saturated_start > 3_s) || (now - watchdog_data.ready_to_run_timestamp > 1_s)
|| watchdog_data.priority_boost_requested.load())
) {
// boost the priority to make sure the logger continues to write to the log.
// Note that we never restore the priority, to keep the logic simple and because it is
// an event that must not occur under normal circumstances (if it does, there's a bug
// somewhere)
sched_param param{};
param.sched_priority = SCHED_PRIORITY_MAX;
if (system_load.tasks[watchdog_data.logger_main_task_index].valid) {
sched_setparam(system_load.tasks[watchdog_data.logger_main_task_index].tcb->pid, &param);
if (logger_main_task.valid) {
sched_param param{};
param.sched_priority = SCHED_PRIORITY_MAX;
sched_setparam(logger_main_task.tcb->pid, &param);
}
sched_setparam(log_writer_task.tcb->pid, &param);
if (logger_writer_task.valid) {
sched_param param{};
param.sched_priority = SCHED_PRIORITY_MAX;
sched_setparam(logger_writer_task.tcb->pid, &param);
}
// reset to avoid trigger again immediately
watchdog_data.sem_counter_saturated_start = now;
watchdog_data.ready_to_run_timestamp = now;
watchdog_data.priority_boosted = true;
watchdog_data.priority_boost_requested.store(true);
// make sure we won't trigger again
watchdog_data.logger_main_task_index = -1;
return true;
} else if (watchdog_data.priority_boosted && !watchdog_data.priority_boost_requested.load()) {
// restore original priorities
if (logger_main_task.valid) {
sched_param param{};
param.sched_priority = SCHED_PRIORITY_LOG_CAPTURE;
sched_setparam(logger_main_task.tcb->pid, &param);
}
if (logger_writer_task.valid) {
sched_param param{};
param.sched_priority = SCHED_PRIORITY_LOG_WRITER;
sched_setparam(logger_writer_task.tcb->pid, &param);
}
watchdog_data.priority_boosted = false;
watchdog_data.priority_boost_requested.store(false);
}
} else {

View File

@ -34,6 +34,7 @@
#pragma once
#include <drivers/drv_hrt.h>
#include <px4_platform_common/atomic.h>
#ifdef __PX4_NUTTX
#include <nuttx/sched.h>
@ -52,7 +53,10 @@ struct watchdog_data_t {
hrt_abstime ready_to_run_timestamp = hrt_absolute_time();
hrt_abstime sem_counter_saturated_start = hrt_absolute_time();
uint8_t last_state = TSTATE_TASK_INVALID;
bool priority_boosted{false};
#endif /* __PX4_NUTTX */
px4::atomic_bool priority_boost_requested {false};
};