diff --git a/src/modules/logger/CMakeLists.txt b/src/modules/logger/CMakeLists.txt index 217f2f73f3..331d788fd7 100644 --- a/src/modules/logger/CMakeLists.txt +++ b/src/modules/logger/CMakeLists.txt @@ -43,6 +43,7 @@ px4_add_module( log_writer.cpp log_writer_file.cpp log_writer_mavlink.cpp + watchdog.cpp DEPENDS version ) diff --git a/src/modules/logger/log_writer.h b/src/modules/logger/log_writer.h index efc8fb3ba0..1069a3ccb6 100644 --- a/src/modules/logger/log_writer.h +++ b/src/modules/logger/log_writer.h @@ -137,6 +137,13 @@ public: return 0; } + pthread_t thread_id_file() const + { + if (_log_writer_file) { return _log_writer_file->thread_id(); } + + return (pthread_t)0; + } + /** * Indicate to the underlying backend whether future write_message() calls need a reliable diff --git a/src/modules/logger/log_writer_file.h b/src/modules/logger/log_writer_file.h index ef4d86f4c7..58e96136da 100644 --- a/src/modules/logger/log_writer_file.h +++ b/src/modules/logger/log_writer_file.h @@ -113,6 +113,8 @@ public: return _need_reliable_transfer; } + pthread_t thread_id() const { return _thread; } + private: static void *run_helper(void *); diff --git a/src/modules/logger/logger.cpp b/src/modules/logger/logger.cpp index 0310af1da7..cdf775a9af 100644 --- a/src/modules/logger/logger.cpp +++ b/src/modules/logger/logger.cpp @@ -34,6 +34,7 @@ #include #include "logger.h" #include "messages.h" +#include "watchdog.h" #include #include @@ -92,6 +93,8 @@ using namespace px4::logger; struct timer_callback_data_s { px4_sem_t semaphore; + watchdog_data_t watchdog_data; + volatile bool watchdog_triggered = false; }; /* This is used to schedule work for the logger (periodic scan for updated topics) */ @@ -101,6 +104,9 @@ static void timer_callback(void *arg) timer_callback_data_s *data = (timer_callback_data_s *)arg; + if (watchdog_update(data->watchdog_data)) { + data->watchdog_triggered = true; + } /* check the value of the semaphore: if the logger cannot keep up with running it's main loop as fast * as the timer_callback here increases the semaphore count, the counter would increase unbounded, @@ -951,6 +957,17 @@ void Logger::run() } else { + if (_writer.backend() & LogWriter::BackendFile) { + + const pid_t pid_self = getpid(); + // The pthread_t ID is equal to the PID on NuttX + const pid_t pid_writer = _writer.thread_id_file(); + + // sched_note_start is already called from pthread_create and task_create, + // which means we can expect to find the tasks in system_load.tasks, as required in watchdog_initialize + watchdog_initialize(pid_self, pid_writer, timer_callback_data.watchdog_data); + } + hrt_call_every(&timer_call, _log_interval, _log_interval, timer_callback, &timer_callback_data); } @@ -1027,6 +1044,11 @@ void Logger::run() } + if (timer_callback_data.watchdog_triggered) { + timer_callback_data.watchdog_triggered = false; + initialize_load_output(PrintLoadReason::Watchdog); + } + const hrt_abstime loop_time = hrt_absolute_time(); @@ -1624,6 +1646,9 @@ void Logger::initialize_load_output(PrintLoadReason reason) void Logger::write_load_output() { + if (_print_load_reason == PrintLoadReason::Watchdog) { + PX4_ERR("Writing watchdog data"); // this is just that we see it easily in the log + } perf_callback_data_t callback_data = {}; char buffer[140]; callback_data.logger = this; diff --git a/src/modules/logger/watchdog.cpp b/src/modules/logger/watchdog.cpp new file mode 100644 index 0000000000..70bde053a6 --- /dev/null +++ b/src/modules/logger/watchdog.cpp @@ -0,0 +1,168 @@ +/**************************************************************************** + * + * Copyright (c) 2016 PX4 Development Team. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name PX4 nor the names of its contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************/ + +#include "watchdog.h" + +#include + +using namespace time_literals; + +namespace px4 +{ +namespace logger +{ + +bool watchdog_update(watchdog_data_t &watchdog_data) +{ + +#ifdef __PX4_NUTTX + + if (system_load.initialized && watchdog_data.logger_main_task_index >= 0 + && watchdog_data.logger_writer_task_index >= 0) { + const hrt_abstime now = hrt_absolute_time(); + const system_load_taskinfo_s &log_writer_task = system_load.tasks[watchdog_data.logger_writer_task_index]; + + if (log_writer_task.valid) { + // Trigger the watchdog if the log writer task has been ready to run for a + // minimum duration and it has not been scheduled during that time. + // When the writer is waiting for an SD transfer, it is not in ready state, thus a long dropout + // will not trigger it. The longest period in ready state I measured was around 70ms, + // after a param change. + // We only check the log writer because it runs at lower priority than the main thread. + // No need to lock the tcb access, since we are in IRQ context + + // update the timestamp if it has been scheduled recently + if (log_writer_task.curr_start_time > watchdog_data.ready_to_run_timestamp) { + watchdog_data.ready_to_run_timestamp = log_writer_task.curr_start_time; + } + + // update the timestamp if not ready to run or if transitioned into ready to run + uint8_t current_state = log_writer_task.tcb->task_state; + + if (current_state != TSTATE_TASK_READYTORUN + || (watchdog_data.last_state != TSTATE_TASK_READYTORUN && current_state == TSTATE_TASK_READYTORUN)) { + watchdog_data.ready_to_run_timestamp = now; + } + + watchdog_data.last_state = current_state; + +#if 0 // for debugging + // test code that prints the maximum time in ready state + static uint64_t max_time = 0; + + if (now - watchdog_data.ready_to_run_timestamp > max_time) { + max_time = now - watchdog_data.ready_to_run_timestamp; + } + + static int counter = 0; + + if (++counter > 300) { + PX4_ERR("max time in ready: %i ms", (int)max_time / 1000); + counter = 0; + max_time = 0; + } + +#endif + + if (now - watchdog_data.ready_to_run_timestamp > 1_s) { + PX4_ERR("watchdog triggered!"); // this will most likely not be logged due to dropouts + + // boost the priority to make sure the logger continues to write to the log + sched_param param{}; + param.sched_priority = SCHED_PRIORITY_MAX; + int ret; + + if (system_load.tasks[watchdog_data.logger_main_task_index].valid) { + ret = sched_setparam(system_load.tasks[watchdog_data.logger_main_task_index].tcb->pid, ¶m); + + if (ret < 0) { + PX4_ERR("sched_reprioritize failed (%i)", ret); + } + } + + ret = sched_setparam(log_writer_task.tcb->pid, ¶m); + + if (ret < 0) { + PX4_ERR("sched_reprioritize failed (%i)", ret); + } + + // make sure we won't trigger again + watchdog_data.logger_main_task_index = -1; + return true; + } + + } else { + // should never happen + watchdog_data.logger_main_task_index = -1; + } + } + +#endif /* __PX4_NUTTX */ + + return false; + +} + +void watchdog_initialize(const pid_t pid_logger_main, const pid_t pid_logger_writer, watchdog_data_t &watchdog_data) +{ +#ifdef __PX4_NUTTX + + sched_lock(); // need to lock the tcb access + + for (int i = 0; i < CONFIG_MAX_TASKS; i++) { + if (system_load.tasks[i].valid) { + if (system_load.tasks[i].tcb->pid == pid_logger_writer) { + watchdog_data.logger_writer_task_index = i; + } + + if (system_load.tasks[i].tcb->pid == pid_logger_main) { + watchdog_data.logger_main_task_index = i; + } + } + } + + sched_unlock(); + + if (watchdog_data.logger_writer_task_index == -1 || + watchdog_data.logger_main_task_index == -1) { + // If we land here it means the NuttX implementation changed + // and one of our assumptions is not valid anymore + PX4_ERR("watchdog init failed"); + } + +#endif /* __PX4_NUTTX */ +} + + +} // namespace logger +} // namespace px4 diff --git a/src/modules/logger/watchdog.h b/src/modules/logger/watchdog.h new file mode 100644 index 0000000000..96471e6d86 --- /dev/null +++ b/src/modules/logger/watchdog.h @@ -0,0 +1,78 @@ +/**************************************************************************** + * + * Copyright (c) 2018 PX4 Development Team. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name PX4 nor the names of its contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************/ + +#pragma once + +#include + +#ifdef __PX4_NUTTX +#include +#include +#endif /* __PX4_NUTTX */ + +namespace px4 +{ +namespace logger +{ + +struct watchdog_data_t { +#ifdef __PX4_NUTTX + int logger_main_task_index = -1; + int logger_writer_task_index = -1; + hrt_abstime ready_to_run_timestamp = hrt_absolute_time(); + uint8_t last_state = TSTATE_TASK_INVALID; +#endif /* __PX4_NUTTX */ +}; + + +/** + * Initialize the watchdog, fill in watchdog_data. + */ +void watchdog_initialize(const pid_t pid_logger_main, const pid_t pid_logger_writer, watchdog_data_t &watchdog_data); + +/** + * Update the watchdog and trigger it if necessary. It is triggered when the log writer task is in + * ready state for a certain period of time, but did not get scheduled. It means that most likely + * some other higher-prio task runs busy. + * When the watchdog triggers, it boosts the priority of the logger's main & writer tasks to maximum, so + * that they get scheduled again. + * + * Expected to be called from IRQ context. + * + * @param watchdog_data + * @return true if watchdog is triggered, false otherwise + */ +bool watchdog_update(watchdog_data_t &watchdog_data); + +} //namespace logger +} //namespace px4 diff --git a/src/modules/systemlib/cpuload.c b/src/modules/systemlib/cpuload.c index 2a5bc64f03..4e18b65776 100644 --- a/src/modules/systemlib/cpuload.c +++ b/src/modules/systemlib/cpuload.c @@ -167,7 +167,11 @@ void sched_note_resume(FAR struct tcb_s *tcb) for (int i = 0; i < CONFIG_MAX_TASKS; i++) { if (system_load.tasks[i].valid && system_load.tasks[i].tcb->pid == tcb->pid) { + // curr_start_time is accessed from an IRQ handler (in logger), so we need + // to make the update atomic + irqstate_t irq_state = px4_enter_critical_section(); system_load.tasks[i].curr_start_time = new_time; + px4_leave_critical_section(irq_state); break; } }