escripts/inspector/inspector_qbitt.py
Eugene Amos 07b323010e Inspector
> Updated the `check_container_health(container_name)` function. Included to check if container is running and any other exceptions. If it finds any of those the functions will return messages that are not "Healthy" which will trigger the  error count.

> Added comments to various functions
2023-12-07 23:19:22 -08:00

230 lines
7.8 KiB
Python
Executable File

#!/usr/bin/env python3
import time
import os
import datetime
import logging
import logging.handlers
import colorlog
import docker
import requests
# Configuration
container_name = 'qbittorrent'
interval_seconds = 60
logs_folder = 'logs/inspector_qbitt'
cooldown_seconds = 300
max_error_count = 5
# Create logs folder if it doesn't exist
if not os.path.exists(logs_folder):
os.makedirs(logs_folder)
def get_log_file_path():
return os.path.join(logs_folder, 'inspector_qbitt.log')
# Rotate the log files at 12am daily
def rotate_log_files(log_file):
now = datetime.datetime.now()
timestamp = now.strftime('%Y-%m-%d')
rotated_log_file = f'{timestamp}.log'
rotated_log_file = f'inspector_qbitt_{timestamp}.log'
rotated_log_path = os.path.join(logs_folder, rotated_log_file)
# Rename the current log file to the rotated log file
os.rename(log_file, rotated_log_path)
# Formating time
def format_remaining_time(remaining_time):
minutes, seconds = divmod(remaining_time.seconds, 60)
return f'{minutes} minutes {seconds} seconds'
# Checking the health of the container
def check_container_health(container_name):
client = docker.from_env()
try:
container = client.containers.get(container_name)
# Check if container is running
if container.status != 'running':
return 'NOT RUNNING'
health_data = container.attrs['State'].get('Health')
if health_data is None:
return 'NO HEALTH CHECK'
health_status = health_data.get('Status', 'UNKNOWN')
return health_status.upper()
except docker.errors.NotFound:
return 'CONTAINER NOT FOUND'
except Exception as e:
return f'ERROR: {str(e)}'
# Restaring the container
def restart_container(container_name):
client = docker.from_env()
container = client.containers.get(container_name)
container.restart()
if __name__ == '__main__':
last_log_date = None # Variable to keep track of the last log file creation date
# Set the logging level to INFO
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# Create a colorlog formatter
formatter = colorlog.ColoredFormatter(
'%(log_color)s%(asctime)s - %(message)s',
datefmt='%Y-%m-%d %I:%M:%S %p',
log_colors={
'DEBUG': 'cyan',
'INFO': 'green',
'WARNING': 'yellow',
'ERROR': 'red',
'CRITICAL': 'white,bg_red',
}
)
# Create a stream handler for console output
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
# Create a file handler for the current log file
log_file = get_log_file_path()
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# Create a timed rotating file handler for log rotation
rotating_file_handler = logging.handlers.TimedRotatingFileHandler(
log_file,
when='midnight',
interval=1,
backupCount=365,
atTime=datetime.time(0, 0) # Rotate at midnight
)
rotating_file_handler.setFormatter(formatter)
logger.addHandler(rotating_file_handler)
consecutive_error_count = 0
reboot_triggered = False
last_reboot_time = None
cooldown_start_time = None
while True:
health_status = check_container_health(container_name)
# Check if a new log file needs to be created after rotation
current_log_date = datetime.datetime.now().strftime('%Y-%m-%d')
if current_log_date != last_log_date:
# Perform log rotation if it's a new day
last_log_date = current_log_date
rotate_log_files(log_file)
# Update the file handler with the new log file path
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
# Remove any existing file handlers and add the new file handler
for old_handler in logger.handlers[:]:
if isinstance(old_handler, logging.FileHandler):
logger.removeHandler(old_handler)
logger.addHandler(file_handler)
#logger.info('[INFO] - --------------------------------')
#logger.info('[INFO] - --------------------------------')
#logger.info(f'qbittorrent Health Status: {health_status}')
# Check if container Health Status is HEALTHY or not
if health_status != 'HEALTHY':
logger.warning(f'[WARN] - qbittorrent Health Status: {health_status}')
else:
logger.info(f'[INFO] - qbittorrent Health Status: {health_status}')
# Check if container is not healthy and cooldown is not active
if health_status != 'HEALTHY' and not cooldown_start_time:
logger.warning('[WARN] - Container is not HEALTHY. Triggering error count.')
consecutive_error_count += 1
# Check if cooldown is not active and the container is not healthy during the current loop iteration
if not cooldown_start_time and (health_status != 'HEALTHY'):
logger.warning(f'[WARN] - Error Count: {consecutive_error_count}')
# Check if consecutive error count reached the maximum
if consecutive_error_count >= max_error_count:
logger.critical(f'[CRIT] - Rebooting Container: {container_name} due to consecutive error count')
restart_container(container_name)
logger.warning('[WARN] - Container Restarted.')
reboot_triggered = True
consecutive_error_count = 0
# Check if cooldown is active
if cooldown_start_time:
remaining_time = cooldown_start_time + datetime.timedelta(seconds=cooldown_seconds) - datetime.datetime.now()
remaining_time = max(remaining_time, datetime.timedelta())
logger.warning(f'[WARN] - Cooldown Countdown: {format_remaining_time(remaining_time)} remaining.')
logger.warning(f'[WARN] - Error Count: {consecutive_error_count}')
# Check if the cooldown period has passed
if datetime.datetime.now() > cooldown_start_time + datetime.timedelta(seconds=cooldown_seconds):
logger.error('[EROR] - Cooldown Period Ended')
logger.error('[EROR] - ---------------------')
cooldown_start_time = None
reboot_triggered = False # Set reboot_triggered to False
#logger.warning(f'cooldown_start_time should be set to NONE: {cooldown_start_time}')
consecutive_error_count = 0 # Reset error count
# Check if reboot was triggered and cooldown is not active
if reboot_triggered and not cooldown_start_time:
#logger.warning(f'cooldown_start_time should be set to a time: ***{cooldown_start_time}***')
#logger.warning(f'reboot_triggered : ***{reboot_triggered}***')
logger.error('[EROR] - Cooldown Started.')
#set cooldown_start_time
cooldown_start_time = datetime.datetime.now()
# Format cooldowntime and output it
remaining_time = cooldown_start_time + datetime.timedelta(seconds=cooldown_seconds) - datetime.datetime.now()
remaining_time = max(remaining_time, datetime.timedelta())
logger.warning(f'[WARN] - Cooldown Countdown: {format_remaining_time(remaining_time)} remaining.')
# Set error count to 0
consecutive_error_count = 0 # Reset error count
# Check if status is HEALTHY
if health_status == 'HEALTHY':
# Reset error count
consecutive_error_count = 0
logger.info('[INFO] - --------------------------------')
time.sleep(interval_seconds)