summaryrefslogtreecommitdiff
path: root/usr.sbin
diff options
context:
space:
mode:
authorMarc Espie <espie@cvs.openbsd.org>2022-06-06 07:39:40 +0000
committerMarc Espie <espie@cvs.openbsd.org>2022-06-06 07:39:40 +0000
commitf82f0c14554cdd095ef6767021a126c30c26c59f (patch)
tree542bebf465d9186c96e71960c612c6dc931cfc18 /usr.sbin
parent5606b668ecde98a160db0c8fc629d013d3be39a2 (diff)
Yet another iteration of the tied algorithm: some packages contain
several hundred copies of the same file (even thousands! I'm looking at you sdcc), so matching through a list is O(n^2). So hash the hashes on the actual file name so that we match directly if the same name file didn't change, and use the first entry otherwise, since the name won't match. speeds up updating of texlive somewhat, and makes a huge difference for sdcc.
Diffstat (limited to 'usr.sbin')
-rw-r--r--usr.sbin/pkg_add/OpenBSD/PkgAdd.pm60
1 files changed, 38 insertions, 22 deletions
diff --git a/usr.sbin/pkg_add/OpenBSD/PkgAdd.pm b/usr.sbin/pkg_add/OpenBSD/PkgAdd.pm
index f92b9534c35..9909ed23caa 100644
--- a/usr.sbin/pkg_add/OpenBSD/PkgAdd.pm
+++ b/usr.sbin/pkg_add/OpenBSD/PkgAdd.pm
@@ -1,7 +1,7 @@
#! /usr/bin/perl
# ex:ts=8 sw=4:
-# $OpenBSD: PkgAdd.pm,v 1.135 2022/05/30 09:30:40 espie Exp $
+# $OpenBSD: PkgAdd.pm,v 1.136 2022/06/06 07:39:39 espie Exp $
#
# Copyright (c) 2003-2014 Marc Espie <espie@openbsd.org>
#
@@ -84,7 +84,7 @@ sub hash_files
my ($self, $state, $sha) = @_;
return if $self->{link} or $self->{symlink} or $self->{nochecksum};
if (defined $self->{d}) {
- push @{$sha->{$self->{d}->key}}, $self;
+ $sha->{$self->{d}->key}{$self->name} = $self;
}
}
@@ -93,33 +93,49 @@ sub tie_files
my ($self, $state, $sha) = @_;
return if $self->{link} or $self->{symlink} or $self->{nochecksum};
# XXX python doesn't like this, overreliance on timestamps
+
return if $self->{name} =~ m/\.py$/ && !defined $self->{ts};
- if (exists $sha->{$self->{d}->key}) {
- my ($tied, $realname);
- for my $c (@{$sha->{$self->{d}->key}}) {
- # don't tie if there's a problem with the file
+
+ my $h = $sha->{$self->{d}->key};
+ return if !defined $h;
+
+ my ($tied, $realname);
+ my $c = $h->{$self->name};
+ # first we try to match with the same name
+ if (defined $c) {
+ $realname = $c->realname($state);
+ # don't tie if the file doesn't exist
+ if (-f $realname &&
+ # or was altered
+ (stat _)[7] == $self->{size}) {
+ $tied = $c;
+ }
+ }
+ # otherwise we grab any other match under similar rules
+ if (!defined $tied) {
+ for my $c ( values %{$h} ) {
$realname = $c->realname($state);
next unless -f $realname;
- # and do a sanity check that this file wasn't altered
next unless (stat _)[7] == $self->{size};
$tied = $c;
- last if $tied->name eq $self->name;
- }
- return if !defined $tied;
- if ($state->defines('checksum')) {
- my $d = $self->compute_digest($realname, $self->{d});
- # XXX we don't have to display anything here
- # because delete will take care of that
- return unless $d->equals($self->{d});
+ last;
}
- # so we found a match that find_extractible will use
- $self->{tieto} = $tied;
- # and we also need to tell size computation we won't be
- # needing extra room for this.
- $tied->{tied} = 1;
- $state->say("Tying #1 to #2", $self->stringize, $realname)
- if $state->verbose >= 3;
}
+ return if !defined $tied;
+
+ if ($state->defines('checksum')) {
+ my $d = $self->compute_digest($realname, $self->{d});
+ # XXX we don't have to display anything here
+ # because delete will take care of that
+ return unless $d->equals($self->{d});
+ }
+ # so we found a match that find_extractible will use
+ $self->{tieto} = $tied;
+ # and we also need to tell size computation we won't be needing
+ # extra diskspace for this.
+ $tied->{tied} = 1;
+ $state->say("Tying #1 to #2", $self->stringize, $realname)
+ if $state->verbose >= 3;
}
package OpenBSD::PkgAdd::State;