#!/usr/bin/perl -w use File::Basename; use Storable; #use File::Slurp; #use constant TERMINATOR => "\n"; use constant TERMINATOR => "\0"; use constant CACHE_FILE => 'CACHE'; use Data::Dumper; $Data::Dumper::Terse = 1; $Data::Dumper::Indent = 1; sub divide(\@\@&@) # USAGE: my (@a, @b); divide(@a, @b, sub { $_%2 }, 1..20); print "odd: @a; even: @b\n"; exit; { my ($hits, $misses, $filter) = (shift, shift, shift); for (@_) { if ($filter->($_)) { push @$hits, $_; } else { push @$misses, $_; } } } sub rx_bucket_divide { my $regexes = shift; my @buckets = map {[$_]} @$regexes; for (@_) { for my $i (0 .. $#buckets) { if ($_ =~ $buckets[$i]->[0]) { push @{$buckets[$i]}, $_; last; } } } return @buckets; } sub write_lines { my $f = shift; write_file($f, map { $_ . TERMINATOR } @_); } sub read_file { my $f = shift; open RF, "<$f" or die $!; my @r = ; close RF; return wantarray ? @r : join '', @r; } sub write_file { my $f = shift; open WF, ">$f" or die $!; print WF $_ for @_; close WF; } sub uniq { @_ or return; my $last = \@_; grep { my $r = $_ ne $last; $last = $_; $r; } @_ } sub subtract_unsorted(\@\@) { my %h = map { $_, 1 } @{$_[1]}; grep { !$h{$_} } @{$_[0]}; } sub subtract(\@\@) { # subtract @$b from @$a and return result as list my ($a, $b) = @_; my ($i, $j) = (0, 0); my @res; while ($i < @$a and $j < @$b) { $i++, next if $a->[$i] eq $b->[$j]; $j++, next if $b->[$j] lt $a->[$i]; push @res, $a->[$i++]; } @res; } my @excludes = sort qw( /root /home /usr/local /tmp /mnt /cdrom /proc /sys /dev /etc/.git); push @excludes, qw( /squashes /overlay ); sub subtract_excludes { my $ex = join '|', @excludes; grep { !m{^($ex)($|/)} } @_; } sub chkerr { return unless $?; my $s = shift || ''; $? == $_<<8 and return for @_; $s = ": $s" if $s; my (undef, undef, $line) = caller; die "$0$s: $! (at line $line).\n" if $? == -1; die "$0$s: returned $? (at line $line).\n"; } sub debsums_init() { if (my @packages_missing_checksums = qx(debsums -l)) { chkerr qw(debsums 1 2 3); chomp @packages_missing_checksums; # TODO: upgrade packages with only upgrades available system qw(apt-get --reinstall -d install), @packages_missing_checksums; chkerr 'apt-get'; system 'debsums --generate=missing,nocheck -p /var/cache/apt/archives'; chkerr qw(debsums 1 2 3); } } sub modified_conf_files() { chomp (my @modified_conf_files = qx(debsums -ce)); chkerr qw(debsums 1 2 3); return @modified_conf_files; } sub modified_other_files() { chomp (my @modified_other_files = qx(debsums -c)); chkerr qw(debsums 1 2 3); return @modified_other_files; } sub get_root_fs_files(\@) { my @root_fs_files; my $excludes = join ' -o ', map { "-wholename $_" } @excludes; @root_fs_files = sort split /\0/, qx{find / -xdev \\( $excludes \\) -a -prune -o -print0}; chkerr 'find'; $root_fs_files[0] = '/.' if $root_fs_files[0] eq '/'; @{$_[0]} = @root_fs_files; } sub parse_alternatives_file($) { my $alt_file = shift; open ALT, "<$alt_file" or die $!; my @lines = ; close ALT or die $!; chomp @lines; shift @lines; # discard the priority my @res; push @res, shift @lines; # primary name while ($_ = shift @lines) { push @res, shift @lines; } return @res; } sub get_dpkg_files(\@\@\%) { my ($dpkg_alternatives, $dpkg_deb_files, $dpkg_packages) = @_; my @dpkg_alternatives = map { parse_alternatives_file $_ } ; my @dpkg_deb_files; my %divert; for (qx(dpkg-divert --list)) { if (m{^local diversion of (.+) to (.+)$}) { $divert{$1} = [$2, ""]; } elsif (m{^diversion of (.+) to (.+) by (\S+)$}) { $divert{$1} = [$2, $3]; } else { die "\$_=$_"; } } chkerr 'dpkg-divert'; for () { my $pkg = basename $_; $pkg =~ s/\.list$//; chomp (my @files = read_file($_)); @files = map { exists($divert{$_}) && $divert{$_}->[1] ne $pkg ? $divert{$_}->[0] : $_ } @files; push @dpkg_deb_files, @files; push @{ $dpkg_packages->{$_} }, $pkg for @files; } chomp @dpkg_deb_files; @{$dpkg_alternatives} = @dpkg_alternatives; @{$dpkg_deb_files} = @dpkg_deb_files; } sub install_selections_from_list($) { my $sel_file = shift; system qq{ dpkg --clear-selections && dpkg --set-selections < $sel_file && apt-get dselect-upgrade }; chkerr 'dpkg'; } sub cache { my $pkg = caller; my %cache; my %reftype = qw(% HASH $ SCALAR @ ARRAY); for (@_) { m/^([\$%@])(.+)/ or warn, next; $cache{$1}{$2} = *{"${pkg}::$2"}{$reftype{$1}}; } store \%cache, CACHE_FILE or die "$!"; } sub uncache { my $pkg = caller; my %cache = eval { %{ retrieve CACHE_FILE } } or return 0; my $retrieved = 0; for (@_) { m/^([\$%@])(.+)/ or warn, next; if (my $ref = $cache{$1}{$2}) { *{"${pkg}::$2"} = $ref; ++$retrieved; } } $retrieved; } our (@root_fs_files, @dpkg_alternatives, @dpkg_deb_files, %dpkg_packages); our (@modified_conf_files, @modified_other_files); my @cachevars = qw( @root_fs_files @dpkg_alternatives @dpkg_deb_files %dpkg_packages @modified_conf_files @modified_other_files ); unless (uncache(@cachevars) == @cachevars) { get_root_fs_files(@root_fs_files); get_dpkg_files(@dpkg_deb_files, @dpkg_alternatives, %dpkg_packages); debsums_init(); @modified_conf_files = modified_conf_files(); @modified_other_files = modified_other_files(); cache(@cachevars); } my @tracked_files = uniq sort @dpkg_deb_files, @dpkg_alternatives; my @untracked_files = subtract(@root_fs_files, @tracked_files); my @missing_files = subtract_excludes subtract @tracked_files, @root_fs_files; write_lines 'configured.txt', @modified_conf_files; write_lines 'modified.txt', @modified_other_files; sub split_and_dump_helper { my $DUMP_PACKAGE_NAMES = shift; my $outfile = shift; my $buckets = shift; for (@_) { # add the files to the appropriate buckets for my $i (0 .. $#$buckets) { if ($_ =~ ${$buckets}[$i]->[1]) { push @{$$buckets[$i]}, $_; last; } } } if ($DUMP_PACKAGE_NAMES) { # add "package:" header lines to output for (@$buckets) { my (%pkg, @out); for (splice @$_, 2) { if (exists $dpkg_packages{$_}) { my $pkgs = join ', ', sort @{$dpkg_packages{$_}}; push @{ $pkg{$pkgs} }, $_; } else { warn "no package for $_\n"; } } # TODO: change sort order so multiple packages occur first for (sort keys %pkg) { push @out, "$_:", @{$pkg{$_}}; } next unless @out; write_file sprintf($outfile, $_->[0]), map { $_ .= TERMINATOR } @out; } } else { for (@$buckets) { next unless @$_ > 2; # empty bucket write_file sprintf($outfile, $_->[0]), map { $_ .= TERMINATOR } splice @$_, 2; } } } sub split_and_dump($\@@) { split_and_dump_helper 1, shift, shift, @_; } sub split_and_dump_nopkg($\@@) { split_and_dump_helper 0, shift, shift, @_; } my @added = ( [alternatives => qr{^(/etc/alternatives/|/var/lib/dpkg/alternatives/)}], [run => qr{^(/var/run/|/etc/resolvconf/run/)}], [boot => qr{^/boot/}], [ssl_certs => qr{^/etc/ssl/certs/}], [python_bytecode => qr{^/usr/lib/(python|pymodules).+\.pyc$}], [pymodules => qr{^/usr/lib/pymodules/}], [python_other => qr{^/usr/lib/python}], [menu_xdg => qr{^/var/lib/menu-xdg/}], # freedesktop.org standard menu entries [defoma => qr{^/var/lib/defoma/}], # debian font manager: compiled fonts & indexes [oddly_placed_pyc => qr{^/usr/(lib|share)/.+\.pyc$}], [usr_src => qr{^/usr/src/}], [omf => qr{^/var/lib/doc-base/omf/}], [emacs_bytecode => qr{^/usr/share/emacs.+\.elc$}], [emacs_lisp => qr{^/usr/share/emacs\d*/site-lisp/}], [jed_bytecode => qr{^/usr/share/jed/(lib|jed-extra)/.+\.(slc|dfa)$}], [mime_info_db => qr{^/usr/share/mime/.+\.xml$}], [apt_cache => qr{^/var/cache/apt/}], [apt_lists => qr{^/var/lib/apt/lists/}], [dpkg => qr{^/var/lib/dpkg/}], [var_log => qr{^/var/log/}], [other => qr{}], ); for (grep { -d $_ } , , ) { (my $pkg = $_) =~ s{.*/}{}; # if (qx(dpkg-query -S $_) eq qq($pkg: $_\n)) if ($dpkg_packages{$_} and @{$dpkg_packages{$_}} == 1 and $dpkg_packages{$_}->[0] eq $pkg) { (my $title = $_) =~ y{/}{_}; splice @added, -1, 0, [$title => qr{^$_/}]; } else { # warn "no package for $_\n"; } } split_and_dump_nopkg "added:%s.txt", @added, @untracked_files; my @removed = ( [english_docs => qr{^/usr/share/(doc|(man|info)/en)/}], [nonenglish_docs => qr{^/usr/share/(man|info)/(?!en/)}], [nonenglish_locales => qr{^/usr/share/locale/(?!en/)}], [other => qr{}], ); split_and_dump "removed:%s.txt", @removed, @missing_files; __END__ Debian system state is: package selections (/var/lib/dpkg/status) Can use dpkg --[get|clear|set]-selections if /var/lib/dpkg/available and /etc/apt/sources.list are copied. (It will only work if the packages really are available through sources in sources.list) modified/removed dpkg-tracked files (debsums -c). Generally these should not exist (to persist across upgrades, they should be diversions, although it is not possible to divert to /dev/null) diversions & stat-overrides untracked files added: either by sysadmin, by daemons etc. (/var/log, /var/lib/dpkg, etc.), or generated by scripts (like .pyc, .elc, /initrd). Files generated by pre/postinst could be special-cased to be regenerated by re-running pre/postinst, to avoid including them. They still need to be included on livecds, but they don't need to be included in a nucleus capable of generating the livecd. But that would require the ability to chroot into the system or such, as well as a lot of tedious error-prone tracking. modified conf files (debsums -ce) should these be separate from files added? what about modified non-conf files? partitions, lvm, md, filesystems, installed bootloader It could be copied to make a system backup livecd; copying logical volume layout especially makes sense. # vim:sw=2