Source code for wasp_backup.retention

# -*- coding: utf-8 -*-
# wasp_backup/retention.py
#
# Copyright (C) 2017 the wasp-backup authors and contributors
# <see AUTHORS file>
#
# This file is part of wasp-backup.
#
# wasp-backup is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# wasp-backup is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with wasp-backup.  If not, see <http://www.gnu.org/licenses/>.

# TODO: document the code
# TODO: write tests for the code

# noinspection PyUnresolvedReferences
from wasp_backup.version import __author__, __version__, __credits__, __license__, __copyright__, __email__
# noinspection PyUnresolvedReferences
from wasp_backup.version import __status__

import re
import json
from datetime import datetime, timedelta
from enum import Enum
from pytz import timezone

from wasp_general.command.enhanced import WCommandArgumentDescriptor, WCommandArgumentRelationship
from wasp_general.command.result import WPlainCommandResult
from wasp_general.network.clients.base import WCommonNetworkClientCapability
from wasp_general.network.clients.collection import __default_client_collection__
from wasp_general.uri import WURI
from wasp_general.datetime import local_tz

from wasp_backup.command_common import WBackupCommand, __common_args__
from wasp_backup.core import WBackupMeta
from wasp_backup.notify import notify


[docs]class WRetentionBackupCommand(WBackupCommand):
[docs] class AgeHelper(Enum): name_parsing = 'name-parsing' archive_meta = 'archive-meta' modification_time = 'modification-time' creation_time = 'creation-time'
[docs] class PeriodKeepFilter: def __init__(self, from_dt, tz, period_value, period_modifier, archive_number): self.__from_dt = from_dt self.__tz = tz self.__reduce_fn = None if period_modifier in ['M', 'H', 'd', 'w']: period = int(period_value) * 60 # at least a minute if period_modifier != 'M': period = period * 60 # at least an hour if period_modifier != 'H': period = period * 24 # at least a day if period_modifier == 'w': period = period * 7 self.__reduce_fn = lambda: self.__from_dt - timedelta(seconds=period) elif period_modifier == 'm': def reduce_fn(): year = self.__from_dt.year if self.__from_dt.month == 1: year -= 1 month = 12 else: month = self.__from_dt.month - 1 return datetime( year=year, month=month, day=self.__from_dt.day, hour=self.__from_dt.hour, minute=self.__from_dt.minute, second=self.__from_dt.second, microsecond=self.__from_dt.microsecond ).replace(tzinfo=self.__tz) self.__reduce_fn = reduce_fn elif period_modifier == 'y': self.__reduce_fn = lambda: datetime( year=self.__from_dt.year - 1, month=self.__from_dt.month, day=self.__from_dt.day, hour=self.__from_dt.hour, minute=self.__from_dt.minute, second=self.__from_dt.second, microsecond=self.__from_dt.microsecond ).replace(tzinfo=self.__tz) else: raise ValueError('Unknown period_modifier was specified') self.__to_dt = self.__reduce_fn() self.__archive_number = int(archive_number) def __call__(self, check_item): if self.__archive_number <= 0: return False archive_name, creation_date = check_item if creation_date > self.__from_dt: return False if creation_date < self.__to_dt: return False self.__archive_number -= 1 self.__from_dt = self.__to_dt self.__to_dt = self.__reduce_fn() return True
__command__ = 'retention' __description__ = 'rotate archive backups that resides locally or on a remote location' __arguments__ = [ WCommandArgumentDescriptor( 'backup-location', required=True, multiple_values=False, meta_var='location', help_info='Location (network or directory) that has multiple backups to rotate' ), WCommandArgumentDescriptor( 'period-keep', required=True, multiple_values=True, meta_var='period@number_of_copies', help_info='parameter speicifies set of archives that should be kept from deleting. During ' ' the specified value only ONE archive will be kept. The value should be input in the ' 'following format: [period value][period modifier]@[number of periods]. Where ' '[period modifier] is one of "M", "H", "d", "w", "m", "y" that represent "minute", "hour", ' '"day", "week", "month", "year" respectively. [number of periods] is number of periods. ' 'For example "2d10" means that there will be 10 archives one archive per 2 days that will ' 'be kept. May be specified multiple times. In that case it works as a union of a single ' '"period-keep" result', casting_helper=WCommandArgumentDescriptor.RegExpArgumentHelper('(\d+)([MHdwmy])@(\d+)') ), WCommandArgumentDescriptor( 'timezone', required=True, multiple_values=False, meta_var='timezone_name', help_info='timezone that will be apply to age-helper. If archive-meta-age-helper is specified ' 'then "UTC" should be used. For other helpers "local" is a good choice' ), WCommandArgumentDescriptor( 'minimum-archives', required=True, multiple_values=False, meta_var='archives_count', help_info='Number of archives that will be kept even if they are expired (the youngest ' 'archives will be selected).', casting_helper=WCommandArgumentDescriptor.IntegerArgumentCastingHelper( validate_fn=lambda x: x > 0 ) ), WCommandArgumentDescriptor( 'archive-selection', required=False, multiple_values=False, meta_var='regexp', help_info='regular expression that is used to select archives from the backup location', default_value='^.*$' ), WCommandArgumentDescriptor( 'name-parser-age-helper', flag_mode=True, help_info='defines method that will be used for ' 'archive to determine its age. This one will define age by parsing an archive name (dumb but ' 'fast)' ), WCommandArgumentDescriptor( 'archive-meta-age-helper', flag_mode=True, help_info='defines method that will be used for ' 'archive to determine its age. This one will define age from meta data from an archive (smart ' 'but slow, because archive may be downloaded and because meta data needs to be extracted)' ), WCommandArgumentDescriptor( 'modification-time-age-helper', flag_mode=True, help_info='defines method that will be used for ' 'archive to determine its age. This one will define age from archive modification time (not ' 'all of the location types supports this type of information, so it may be unavailable)' ), WCommandArgumentDescriptor( 'creation-time-age-helper', flag_mode=True, help_info='defines method that will be used for ' 'archive to determine its age. This one will define age from archive creation time (not ' 'all of the location types supports this type of information, so it may be unavailable)' ), WCommandArgumentDescriptor( 'date-format', required=False, multiple_values=False, meta_var='format', help_info='defines format of a date that will be used to find archive age (format has ' 'the same syntax as strptime python function)' ), WCommandArgumentDescriptor( 'download-location', required=False, multiple_values=False, meta_var='directory_path', help_info='directory where archives should be downloaded to in order to fecth archive meta ' 'data (used with "archive-meta-age-helper" flag)', default_value='/var/tmp' ), __common_args__['notify-app'] ] __relationships__ = [ WCommandArgumentRelationship( WCommandArgumentRelationship.Relationship.one_of, 'name-parser-age-helper', 'archive-meta-age-helper', 'modification-time-age-helper', 'creation-time-age-helper' ), WCommandArgumentRelationship( WCommandArgumentRelationship.Relationship.requirement, 'name-parser-age-helper', 'date-format' ), ] def _exec(self, command_arguments, **command_env): location = command_arguments['backup-location'] uri = WURI.parse(location) network_client = __default_client_collection__.open(uri) archives = network_client.request(WCommonNetworkClientCapability.list_dir) archive_selection_re = re.compile(command_arguments['archive-selection']) re_selected_archives = tuple(filter(lambda x: archive_selection_re.match(x) is not None, archives)) if command_arguments['timezone'] != 'local': tz = timezone(command_arguments['timezone']) else: tz = local_tz() now = datetime.now(tz=tz) age_helper = self.__age_helper(command_arguments, network_client, tz) archive_ages = [(x, age_helper(x)) for x in re_selected_archives] archive_ages = list(filter(lambda x: (now - x[1]).total_seconds() > 0, archive_ages)) # remove list fn archive_ages.sort(key=lambda x: (now - x[1]).total_seconds()) sorted_archives = [x[0] for x in archive_ages] archive_to_keep = set() for period_keep in command_arguments['period-keep']: archive_to_keep.update(filter( WRetentionBackupCommand.PeriodKeepFilter(now, tz, *period_keep), archive_ages )) keep_archives = [x[0] for x in archive_to_keep] extra_archives_required = command_arguments['minimum-archives'] - len(keep_archives) if extra_archives_required > 0: for i in range(len(sorted_archives)): archive_name = sorted_archives[i] if archive_name not in keep_archives: keep_archives.append(archive_name) extra_archives_required -= 1 if extra_archives_required <= 0: break files_to_remove = set(sorted_archives).difference(keep_archives) for file_name in files_to_remove: network_client.request(WCommonNetworkClientCapability.remove_file, file_name) if 'notify-app' in command_arguments: notify( { WBackupMeta.RetentionNotificationOptions.retention_location: location, WBackupMeta.RetentionNotificationOptions.kept_archives: list(keep_archives), WBackupMeta.RetentionNotificationOptions.removed_archives: list(files_to_remove) }, command_arguments['notify-app'], encode_strict_cls=(WBackupMeta.RetentionNotificationOptions) ) return WPlainCommandResult( 'Archives deleted - %i, archives kept - %i' % (len(files_to_remove), len(set(re_selected_archives).difference(files_to_remove))) ) def __age_helper(self, command_arguments, network_client, tz): if command_arguments['name-parser-age-helper'] is True: return self.__name_parser_helper(tz, command_arguments['date-format']) elif command_arguments['archive-meta-age-helper'] is True: return self.__archive_meta_helper(tz, network_client, command_arguments['download-location']) elif command_arguments['modification-time-age-helper'] is True: return self.__modification_time_helper(tz, network_client) elif command_arguments['creation-time-age-helper'] is True: return self.__creation_time_helper(tz, network_client) raise ValueError('Unknown helper name is specified') def __name_parser_helper(self, tz, date_format): def helper(archive): return datetime.strptime(archive, date_format).replace(tzinfo=tz) return helper def __archive_meta_helper(self, tz, network_client, download_location): # TODO: implement this raise NotImplementedError('Not ready') def __modification_time_helper(self, tz, network_client): # TODO: implement this raise NotImplementedError('Not ready') def __creation_time_helper(self, tz, network_client): # TODO: implement this raise NotImplementedError('Not ready')