> Updated the `check_container_health(container_name)` function. Included to check if container is running and any other exceptions. If it finds any of those the functions will return messages that are not "Healthy" which will trigger the error count. > Added comments to various functions
230 lines
7.8 KiB
Python
Executable File
230 lines
7.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import time
|
|
import os
|
|
import datetime
|
|
import logging
|
|
import logging.handlers
|
|
import colorlog
|
|
import docker
|
|
import requests
|
|
|
|
# Configuration
|
|
container_name = 'qbittorrent'
|
|
|
|
interval_seconds = 60
|
|
logs_folder = 'logs/inspector_qbitt'
|
|
cooldown_seconds = 300
|
|
max_error_count = 5
|
|
|
|
# Create logs folder if it doesn't exist
|
|
if not os.path.exists(logs_folder):
|
|
os.makedirs(logs_folder)
|
|
|
|
def get_log_file_path():
|
|
return os.path.join(logs_folder, 'inspector_qbitt.log')
|
|
|
|
# Rotate the log files at 12am daily
|
|
def rotate_log_files(log_file):
|
|
now = datetime.datetime.now()
|
|
timestamp = now.strftime('%Y-%m-%d')
|
|
rotated_log_file = f'{timestamp}.log'
|
|
rotated_log_file = f'inspector_qbitt_{timestamp}.log'
|
|
rotated_log_path = os.path.join(logs_folder, rotated_log_file)
|
|
|
|
# Rename the current log file to the rotated log file
|
|
os.rename(log_file, rotated_log_path)
|
|
|
|
|
|
# Formating time
|
|
def format_remaining_time(remaining_time):
|
|
minutes, seconds = divmod(remaining_time.seconds, 60)
|
|
return f'{minutes} minutes {seconds} seconds'
|
|
|
|
|
|
# Checking the health of the container
|
|
def check_container_health(container_name):
|
|
client = docker.from_env()
|
|
try:
|
|
container = client.containers.get(container_name)
|
|
# Check if container is running
|
|
if container.status != 'running':
|
|
return 'NOT RUNNING'
|
|
|
|
health_data = container.attrs['State'].get('Health')
|
|
if health_data is None:
|
|
return 'NO HEALTH CHECK'
|
|
|
|
health_status = health_data.get('Status', 'UNKNOWN')
|
|
return health_status.upper()
|
|
|
|
except docker.errors.NotFound:
|
|
return 'CONTAINER NOT FOUND'
|
|
except Exception as e:
|
|
return f'ERROR: {str(e)}'
|
|
|
|
|
|
# Restaring the container
|
|
def restart_container(container_name):
|
|
client = docker.from_env()
|
|
container = client.containers.get(container_name)
|
|
container.restart()
|
|
|
|
if __name__ == '__main__':
|
|
|
|
last_log_date = None # Variable to keep track of the last log file creation date
|
|
|
|
# Set the logging level to INFO
|
|
logger = logging.getLogger()
|
|
logger.setLevel(logging.INFO)
|
|
|
|
# Create a colorlog formatter
|
|
formatter = colorlog.ColoredFormatter(
|
|
'%(log_color)s%(asctime)s - %(message)s',
|
|
datefmt='%Y-%m-%d %I:%M:%S %p',
|
|
log_colors={
|
|
'DEBUG': 'cyan',
|
|
'INFO': 'green',
|
|
'WARNING': 'yellow',
|
|
'ERROR': 'red',
|
|
'CRITICAL': 'white,bg_red',
|
|
}
|
|
)
|
|
|
|
# Create a stream handler for console output
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setLevel(logging.INFO)
|
|
console_handler.setFormatter(formatter)
|
|
logger.addHandler(console_handler)
|
|
|
|
# Create a file handler for the current log file
|
|
log_file = get_log_file_path()
|
|
file_handler = logging.FileHandler(log_file)
|
|
file_handler.setLevel(logging.INFO)
|
|
file_handler.setFormatter(formatter)
|
|
logger.addHandler(file_handler)
|
|
|
|
# Create a timed rotating file handler for log rotation
|
|
rotating_file_handler = logging.handlers.TimedRotatingFileHandler(
|
|
log_file,
|
|
when='midnight',
|
|
interval=1,
|
|
backupCount=365,
|
|
atTime=datetime.time(0, 0) # Rotate at midnight
|
|
)
|
|
rotating_file_handler.setFormatter(formatter)
|
|
logger.addHandler(rotating_file_handler)
|
|
|
|
consecutive_error_count = 0
|
|
reboot_triggered = False
|
|
last_reboot_time = None
|
|
cooldown_start_time = None
|
|
|
|
|
|
while True:
|
|
|
|
|
|
health_status = check_container_health(container_name)
|
|
|
|
# Check if a new log file needs to be created after rotation
|
|
current_log_date = datetime.datetime.now().strftime('%Y-%m-%d')
|
|
if current_log_date != last_log_date:
|
|
# Perform log rotation if it's a new day
|
|
last_log_date = current_log_date
|
|
rotate_log_files(log_file)
|
|
|
|
# Update the file handler with the new log file path
|
|
file_handler = logging.FileHandler(log_file)
|
|
file_handler.setLevel(logging.INFO)
|
|
file_handler.setFormatter(formatter)
|
|
|
|
# Remove any existing file handlers and add the new file handler
|
|
for old_handler in logger.handlers[:]:
|
|
if isinstance(old_handler, logging.FileHandler):
|
|
logger.removeHandler(old_handler)
|
|
logger.addHandler(file_handler)
|
|
|
|
|
|
#logger.info('[INFO] - --------------------------------')
|
|
#logger.info('[INFO] - --------------------------------')
|
|
#logger.info(f'qbittorrent Health Status: {health_status}')
|
|
|
|
|
|
# Check if container Health Status is HEALTHY or not
|
|
if health_status != 'HEALTHY':
|
|
logger.warning(f'[WARN] - qbittorrent Health Status: {health_status}')
|
|
else:
|
|
logger.info(f'[INFO] - qbittorrent Health Status: {health_status}')
|
|
|
|
|
|
|
|
# Check if container is not healthy and cooldown is not active
|
|
if health_status != 'HEALTHY' and not cooldown_start_time:
|
|
logger.warning('[WARN] - Container is not HEALTHY. Triggering error count.')
|
|
consecutive_error_count += 1
|
|
|
|
# Check if cooldown is not active and the container is not healthy during the current loop iteration
|
|
if not cooldown_start_time and (health_status != 'HEALTHY'):
|
|
logger.warning(f'[WARN] - Error Count: {consecutive_error_count}')
|
|
|
|
# Check if consecutive error count reached the maximum
|
|
if consecutive_error_count >= max_error_count:
|
|
logger.critical(f'[CRIT] - Rebooting Container: {container_name} due to consecutive error count')
|
|
restart_container(container_name)
|
|
logger.warning('[WARN] - Container Restarted.')
|
|
reboot_triggered = True
|
|
consecutive_error_count = 0
|
|
|
|
|
|
# Check if cooldown is active
|
|
if cooldown_start_time:
|
|
remaining_time = cooldown_start_time + datetime.timedelta(seconds=cooldown_seconds) - datetime.datetime.now()
|
|
remaining_time = max(remaining_time, datetime.timedelta())
|
|
logger.warning(f'[WARN] - Cooldown Countdown: {format_remaining_time(remaining_time)} remaining.')
|
|
logger.warning(f'[WARN] - Error Count: {consecutive_error_count}')
|
|
|
|
# Check if the cooldown period has passed
|
|
if datetime.datetime.now() > cooldown_start_time + datetime.timedelta(seconds=cooldown_seconds):
|
|
logger.error('[EROR] - Cooldown Period Ended')
|
|
logger.error('[EROR] - ---------------------')
|
|
cooldown_start_time = None
|
|
reboot_triggered = False # Set reboot_triggered to False
|
|
#logger.warning(f'cooldown_start_time should be set to NONE: {cooldown_start_time}')
|
|
consecutive_error_count = 0 # Reset error count
|
|
|
|
# Check if reboot was triggered and cooldown is not active
|
|
if reboot_triggered and not cooldown_start_time:
|
|
#logger.warning(f'cooldown_start_time should be set to a time: ***{cooldown_start_time}***')
|
|
#logger.warning(f'reboot_triggered : ***{reboot_triggered}***')
|
|
logger.error('[EROR] - Cooldown Started.')
|
|
#set cooldown_start_time
|
|
cooldown_start_time = datetime.datetime.now()
|
|
|
|
# Format cooldowntime and output it
|
|
remaining_time = cooldown_start_time + datetime.timedelta(seconds=cooldown_seconds) - datetime.datetime.now()
|
|
remaining_time = max(remaining_time, datetime.timedelta())
|
|
logger.warning(f'[WARN] - Cooldown Countdown: {format_remaining_time(remaining_time)} remaining.')
|
|
|
|
# Set error count to 0
|
|
consecutive_error_count = 0 # Reset error count
|
|
|
|
|
|
# Check if status is HEALTHY
|
|
if health_status == 'HEALTHY':
|
|
# Reset error count
|
|
consecutive_error_count = 0
|
|
|
|
|
|
logger.info('[INFO] - --------------------------------')
|
|
|
|
time.sleep(interval_seconds)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|