• 0 Posts
  • 16 Comments
Joined 2 years ago
cake
Cake day: December 9th, 2023

help-circle






  • I created a script that I dropped into /etc/cron.hourly which does the following:

    1. Use rsync to mirror my root partition to a btrfs partition on another hard drive (which only updates modified files).
    2. Use btrfs subvolume snapshot to create a snapshot of that mirror (which only uses additional storage for modified files).
    3. Moves “old” snapshots into a trash directory so I can delete them later if I want to save space.

    It is as follows:

    #!/usr/bin/env python
    from datetime import datetime, timedelta
    import os
    import pathlib
    import shutil
    import subprocess
    import sys
    
    import portalocker
    
    DATETIME_FORMAT = '%Y-%m-%d-%H%M'
    BACKUP_DIRECTORY = pathlib.Path('/backups/internal')
    MIRROR_DIRECTORY = BACKUP_DIRECTORY / 'mirror'
    SNAPSHOT_DIRECTORY = BACKUP_DIRECTORY / 'snapshots'
    TRASH_DIRECTORY = BACKUP_DIRECTORY / 'trash'
    
    EXCLUDED = [
        '/backups',
        '/dev',
        '/media',
        '/lost+found',
        '/mnt',
        '/nix',
        '/proc',
        '/run',
        '/sys',
        '/tmp',
        '/var',
    
        '/home/*/.cache',
        '/home/*/.local/share/flatpak',
        '/home/*/.local/share/Trash',
        '/home/*/.steam',
        '/home/*/Downloads',
        '/home/*/Trash',
    ]
    
    OPTIONS = [
        '-avAXH',
        '--delete',
        '--delete-excluded',
        '--numeric-ids',
        '--relative',
        '--progress',
    ]
    
    def execute(command, *options):
        print('>', command, *options)
        subprocess.run((command,) + options).check_returncode()
    
    execute(
        '/usr/bin/mount',
        '-o', 'rw,remount',
        BACKUP_DIRECTORY,
    )
    
    try:
        with portalocker.Lock(os.path.join(BACKUP_DIRECTORY,'lock')):
            execute(
                '/usr/bin/rsync',
                '/',
                MIRROR_DIRECTORY,
                *(
                    OPTIONS
                    +
                    [f'--exclude={excluded_path}' for excluded_path in EXCLUDED]
                )
            )
    
            execute(
                '/usr/bin/btrfs',
                'subvolume',
                'snapshot',
                '-r',
                MIRROR_DIRECTORY,
                SNAPSHOT_DIRECTORY / datetime.now().strftime(DATETIME_FORMAT),
            )
    
            snapshot_datetimes = sorted(
                (
                    datetime.strptime(filename, DATETIME_FORMAT)
                    for filename in os.listdir(SNAPSHOT_DIRECTORY)
                ),
            )
    
            # Keep the last 24 hours of snapshot_datetimes
            one_day_ago = datetime.now() - timedelta(days=1)
            while snapshot_datetimes and snapshot_datetimes[-1] >= one_day_ago:
                snapshot_datetimes.pop()
    
            # Helper function for selecting all of the snapshot_datetimes for a given day/month
            def prune_all_with(get_metric):
                this = get_metric(snapshot_datetimes[-1])
                snapshot_datetimes.pop()
                while snapshot_datetimes and get_metric(snapshot_datetimes[-1]) == this:
                    snapshot = SNAPSHOT_DIRECTORY / snapshot_datetimes[-1].strftime(DATETIME_FORMAT)
                    snapshot_datetimes.pop()
                    execute('/usr/bin/btrfs', 'property', 'set', '-ts', snapshot, 'ro', 'false')
                    shutil.move(snapshot, TRASH_DIRECTORY)
    
            # Keep daily snapshot_datetimes for the last month
            last_daily_to_keep = datetime.now().date() - timedelta(days=30)
            while snapshot_datetimes and snapshot_datetimes[-1].date() >= last_daily_to_keep:
                prune_all_with(lambda x: x.date())
    
            # Keep weekly snapshot_datetimes for the last three month
            last_weekly_to_keep = datetime.now().date() - timedelta(days=90)
            while snapshot_datetimes and snapshot_datetimes[-1].date() >= last_weekly_to_keep:
                prune_all_with(lambda x: x.date().isocalendar().week)
    
            # Keep monthly snapshot_datetimes forever
            while snapshot_datetimes:
                prune_all_with(lambda x: x.date().month)
    except portalocker.AlreadyLocked:
        sys.exit('Backup already in progress.')
    finally:
        execute(
            '/usr/bin/mount',
            '-o', 'ro,remount',
            BACKUP_DIRECTORY,
        )