717 lines
18 KiB
Perl
717 lines
18 KiB
Perl
package Archive::Tar::File;
|
|
use strict;
|
|
|
|
use Carp ();
|
|
use IO::File;
|
|
use File::Spec::Unix ();
|
|
use File::Spec ();
|
|
use File::Basename ();
|
|
|
|
use Archive::Tar::Constant;
|
|
|
|
use vars qw[@ISA $VERSION];
|
|
#@ISA = qw[Archive::Tar];
|
|
$VERSION = '2.38';
|
|
|
|
### set value to 1 to oct() it during the unpack ###
|
|
|
|
my $tmpl = [
|
|
name => 0, # string A100
|
|
mode => 1, # octal A8
|
|
uid => 1, # octal A8
|
|
gid => 1, # octal A8
|
|
size => 0, # octal # cdrake - not *always* octal.. A12
|
|
mtime => 1, # octal A12
|
|
chksum => 1, # octal A8
|
|
type => 0, # character A1
|
|
linkname => 0, # string A100
|
|
magic => 0, # string A6
|
|
version => 0, # 2 bytes A2
|
|
uname => 0, # string A32
|
|
gname => 0, # string A32
|
|
devmajor => 1, # octal A8
|
|
devminor => 1, # octal A8
|
|
prefix => 0, # A155 x 12
|
|
|
|
### end UNPACK items ###
|
|
raw => 0, # the raw data chunk
|
|
data => 0, # the data associated with the file --
|
|
# This might be very memory intensive
|
|
];
|
|
|
|
### install get/set accessors for this object.
|
|
for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
|
|
my $key = $tmpl->[$i];
|
|
no strict 'refs';
|
|
*{__PACKAGE__."::$key"} = sub {
|
|
my $self = shift;
|
|
$self->{$key} = $_[0] if @_;
|
|
|
|
### just in case the key is not there or undef or something ###
|
|
{ local $^W = 0;
|
|
return $self->{$key};
|
|
}
|
|
}
|
|
}
|
|
|
|
=head1 NAME
|
|
|
|
Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
my @items = $tar->get_files;
|
|
|
|
print $_->name, ' ', $_->size, "\n" for @items;
|
|
|
|
print $object->get_content;
|
|
$object->replace_content('new content');
|
|
|
|
$object->rename( 'new/full/path/to/file.c' );
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Archive::Tar::Files provides a neat little object layer for in-memory
|
|
extracted files. It's mostly used internally in Archive::Tar to tidy
|
|
up the code, but there's no reason users shouldn't use this API as
|
|
well.
|
|
|
|
=head2 Accessors
|
|
|
|
A lot of the methods in this package are accessors to the various
|
|
fields in the tar header:
|
|
|
|
=over 4
|
|
|
|
=item name
|
|
|
|
The file's name
|
|
|
|
=item mode
|
|
|
|
The file's mode
|
|
|
|
=item uid
|
|
|
|
The user id owning the file
|
|
|
|
=item gid
|
|
|
|
The group id owning the file
|
|
|
|
=item size
|
|
|
|
File size in bytes
|
|
|
|
=item mtime
|
|
|
|
Modification time. Adjusted to mac-time on MacOS if required
|
|
|
|
=item chksum
|
|
|
|
Checksum field for the tar header
|
|
|
|
=item type
|
|
|
|
File type -- numeric, but comparable to exported constants -- see
|
|
Archive::Tar's documentation
|
|
|
|
=item linkname
|
|
|
|
If the file is a symlink, the file it's pointing to
|
|
|
|
=item magic
|
|
|
|
Tar magic string -- not useful for most users
|
|
|
|
=item version
|
|
|
|
Tar version string -- not useful for most users
|
|
|
|
=item uname
|
|
|
|
The user name that owns the file
|
|
|
|
=item gname
|
|
|
|
The group name that owns the file
|
|
|
|
=item devmajor
|
|
|
|
Device major number in case of a special file
|
|
|
|
=item devminor
|
|
|
|
Device minor number in case of a special file
|
|
|
|
=item prefix
|
|
|
|
Any directory to prefix to the extraction path, if any
|
|
|
|
=item raw
|
|
|
|
Raw tar header -- not useful for most users
|
|
|
|
=back
|
|
|
|
=head1 Methods
|
|
|
|
=head2 Archive::Tar::File->new( file => $path )
|
|
|
|
Returns a new Archive::Tar::File object from an existing file.
|
|
|
|
Returns undef on failure.
|
|
|
|
=head2 Archive::Tar::File->new( data => $path, $data, $opt )
|
|
|
|
Returns a new Archive::Tar::File object from data.
|
|
|
|
C<$path> defines the file name (which need not exist), C<$data> the
|
|
file contents, and C<$opt> is a reference to a hash of attributes
|
|
which may be used to override the default attributes (fields in the
|
|
tar header), which are described above in the Accessors section.
|
|
|
|
Returns undef on failure.
|
|
|
|
=head2 Archive::Tar::File->new( chunk => $chunk )
|
|
|
|
Returns a new Archive::Tar::File object from a raw 512-byte tar
|
|
archive chunk.
|
|
|
|
Returns undef on failure.
|
|
|
|
=cut
|
|
|
|
sub new {
|
|
my $class = shift;
|
|
my $what = shift;
|
|
|
|
my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
|
|
($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
|
|
($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
|
|
undef;
|
|
|
|
return $obj;
|
|
}
|
|
|
|
### copies the data, creates a clone ###
|
|
sub clone {
|
|
my $self = shift;
|
|
return bless { %$self }, ref $self;
|
|
}
|
|
|
|
sub _new_from_chunk {
|
|
my $class = shift;
|
|
my $chunk = shift or return; # 512 bytes of tar header
|
|
my %hash = @_;
|
|
|
|
### filter any arguments on defined-ness of values.
|
|
### this allows overriding from what the tar-header is saying
|
|
### about this tar-entry. Particularly useful for @LongLink files
|
|
my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
|
|
|
|
### makes it start at 0 actually... :) ###
|
|
my $i = -1;
|
|
my %entry = map {
|
|
my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake
|
|
($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake
|
|
$s=> $v ? oct $_ : $_ # cdrake
|
|
# $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb
|
|
} unpack( UNPACK, $chunk ); # cdrake
|
|
# } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake
|
|
|
|
|
|
if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake
|
|
my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
|
|
} else { # cdrake
|
|
($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake
|
|
} # cdrake
|
|
|
|
|
|
my $obj = bless { %entry, %args }, $class;
|
|
|
|
### magic is a filetype string.. it should have something like 'ustar' or
|
|
### something similar... if the chunk is garbage, skip it
|
|
return unless $obj->magic !~ /\W/;
|
|
|
|
### store the original chunk ###
|
|
$obj->raw( $chunk );
|
|
|
|
$obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
|
|
$obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
|
|
|
|
|
|
return $obj;
|
|
|
|
}
|
|
|
|
sub _new_from_file {
|
|
my $class = shift;
|
|
my $path = shift;
|
|
|
|
### path has to at least exist
|
|
return unless defined $path;
|
|
|
|
my $type = __PACKAGE__->_filetype($path);
|
|
my $data = '';
|
|
|
|
READ: {
|
|
unless ($type == DIR ) {
|
|
my $fh = IO::File->new;
|
|
|
|
unless( $fh->open($path) ) {
|
|
### dangling symlinks are fine, stop reading but continue
|
|
### creating the object
|
|
last READ if $type == SYMLINK;
|
|
|
|
### otherwise, return from this function --
|
|
### anything that's *not* a symlink should be
|
|
### resolvable
|
|
return;
|
|
}
|
|
|
|
### binmode needed to read files properly on win32 ###
|
|
binmode $fh;
|
|
$data = do { local $/; <$fh> };
|
|
close $fh;
|
|
}
|
|
}
|
|
|
|
my @items = qw[mode uid gid size mtime];
|
|
my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
|
|
|
|
if (ON_VMS) {
|
|
### VMS has two UID modes, traditional and POSIX. Normally POSIX is
|
|
### not used. We currently do not have an easy way to see if we are in
|
|
### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
|
|
### The VMS UIC has the upper 16 bits is the GID, which in many cases
|
|
### the VMS UIC will be larger than 209715, the largest that TAR can
|
|
### handle. So for now, assume it is traditional if the UID is larger
|
|
### than 0x10000.
|
|
|
|
if ($hash{uid} > 0x10000) {
|
|
$hash{uid} = $hash{uid} & 0xFFFF;
|
|
}
|
|
|
|
### The file length from stat() is the physical length of the file
|
|
### However the amount of data read in may be more for some file types.
|
|
### Fixed length files are read past the logical EOF to end of the block
|
|
### containing. Other file types get expanded on read because record
|
|
### delimiters are added.
|
|
|
|
my $data_len = length $data;
|
|
$hash{size} = $data_len if $hash{size} < $data_len;
|
|
|
|
}
|
|
### you *must* set size == 0 on symlinks, or the next entry will be
|
|
### though of as the contents of the symlink, which is wrong.
|
|
### this fixes bug #7937
|
|
$hash{size} = 0 if ($type == DIR or $type == SYMLINK);
|
|
$hash{mtime} -= TIME_OFFSET;
|
|
|
|
### strip the high bits off the mode, which we don't need to store
|
|
$hash{mode} = STRIP_MODE->( $hash{mode} );
|
|
|
|
|
|
### probably requires some file path munging here ... ###
|
|
### name and prefix are set later
|
|
my $obj = {
|
|
%hash,
|
|
name => '',
|
|
chksum => CHECK_SUM,
|
|
type => $type,
|
|
linkname => ($type == SYMLINK and CAN_READLINK)
|
|
? readlink $path
|
|
: '',
|
|
magic => MAGIC,
|
|
version => TAR_VERSION,
|
|
uname => UNAME->( $hash{uid} ),
|
|
gname => GNAME->( $hash{gid} ),
|
|
devmajor => 0, # not handled
|
|
devminor => 0, # not handled
|
|
prefix => '',
|
|
data => $data,
|
|
};
|
|
|
|
bless $obj, $class;
|
|
|
|
### fix up the prefix and file from the path
|
|
my($prefix,$file) = $obj->_prefix_and_file( $path );
|
|
$obj->prefix( $prefix );
|
|
$obj->name( $file );
|
|
|
|
return $obj;
|
|
}
|
|
|
|
sub _new_from_data {
|
|
my $class = shift;
|
|
my $path = shift; return unless defined $path;
|
|
my $data = shift; return unless defined $data;
|
|
my $opt = shift;
|
|
|
|
my $obj = {
|
|
data => $data,
|
|
name => '',
|
|
mode => MODE,
|
|
uid => UID,
|
|
gid => GID,
|
|
size => length $data,
|
|
mtime => time - TIME_OFFSET,
|
|
chksum => CHECK_SUM,
|
|
type => FILE,
|
|
linkname => '',
|
|
magic => MAGIC,
|
|
version => TAR_VERSION,
|
|
uname => UNAME->( UID ),
|
|
gname => GNAME->( GID ),
|
|
devminor => 0,
|
|
devmajor => 0,
|
|
prefix => '',
|
|
};
|
|
|
|
### overwrite with user options, if provided ###
|
|
if( $opt and ref $opt eq 'HASH' ) {
|
|
for my $key ( keys %$opt ) {
|
|
|
|
### don't write bogus options ###
|
|
next unless exists $obj->{$key};
|
|
$obj->{$key} = $opt->{$key};
|
|
}
|
|
}
|
|
|
|
bless $obj, $class;
|
|
|
|
### fix up the prefix and file from the path
|
|
my($prefix,$file) = $obj->_prefix_and_file( $path );
|
|
$obj->prefix( $prefix );
|
|
$obj->name( $file );
|
|
|
|
return $obj;
|
|
}
|
|
|
|
sub _prefix_and_file {
|
|
my $self = shift;
|
|
my $path = shift;
|
|
|
|
my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
|
|
my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) );
|
|
|
|
### if it's a directory, then $file might be empty
|
|
$file = pop @dirs if $self->is_dir and not length $file;
|
|
|
|
### splitting ../ gives you the relative path in native syntax
|
|
### Remove the root (000000) directory
|
|
### The volume from splitpath will also be in native syntax
|
|
if (ON_VMS) {
|
|
map { $_ = '..' if $_ eq '-'; $_ = '' if $_ eq '000000' } @dirs;
|
|
if (length($vol)) {
|
|
$vol = VMS::Filespec::unixify($vol);
|
|
unshift @dirs, $vol;
|
|
}
|
|
}
|
|
|
|
my $prefix = File::Spec::Unix->catdir(@dirs);
|
|
return( $prefix, $file );
|
|
}
|
|
|
|
sub _filetype {
|
|
my $self = shift;
|
|
my $file = shift;
|
|
|
|
return unless defined $file;
|
|
|
|
return SYMLINK if (-l $file); # Symlink
|
|
|
|
return FILE if (-f _); # Plain file
|
|
|
|
return DIR if (-d _); # Directory
|
|
|
|
return FIFO if (-p _); # Named pipe
|
|
|
|
return SOCKET if (-S _); # Socket
|
|
|
|
return BLOCKDEV if (-b _); # Block special
|
|
|
|
return CHARDEV if (-c _); # Character special
|
|
|
|
### shouldn't happen, this is when making archives, not reading ###
|
|
return LONGLINK if ( $file eq LONGLINK_NAME );
|
|
|
|
return UNKNOWN; # Something else (like what?)
|
|
|
|
}
|
|
|
|
### this method 'downgrades' a file to plain file -- this is used for
|
|
### symlinks when FOLLOW_SYMLINKS is true.
|
|
sub _downgrade_to_plainfile {
|
|
my $entry = shift;
|
|
$entry->type( FILE );
|
|
$entry->mode( MODE );
|
|
$entry->linkname('');
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 $bool = $file->extract( [ $alternative_name ] )
|
|
|
|
Extract this object, optionally to an alternative name.
|
|
|
|
See C<< Archive::Tar->extract_file >> for details.
|
|
|
|
Returns true on success and false on failure.
|
|
|
|
=cut
|
|
|
|
sub extract {
|
|
my $self = shift;
|
|
|
|
local $Carp::CarpLevel += 1;
|
|
|
|
### avoid circular use, so only require;
|
|
require Archive::Tar;
|
|
return Archive::Tar->_extract_file( $self, @_ );
|
|
}
|
|
|
|
=head2 $path = $file->full_path
|
|
|
|
Returns the full path from the tar header; this is basically a
|
|
concatenation of the C<prefix> and C<name> fields.
|
|
|
|
=cut
|
|
|
|
sub full_path {
|
|
my $self = shift;
|
|
|
|
### if prefix field is empty
|
|
return $self->name unless defined $self->prefix and length $self->prefix;
|
|
|
|
### or otherwise, catfile'd
|
|
return File::Spec::Unix->catfile( $self->prefix, $self->name );
|
|
}
|
|
|
|
|
|
=head2 $bool = $file->validate
|
|
|
|
Done by Archive::Tar internally when reading the tar file:
|
|
validate the header against the checksum to ensure integer tar file.
|
|
|
|
Returns true on success, false on failure
|
|
|
|
=cut
|
|
|
|
sub validate {
|
|
my $self = shift;
|
|
|
|
my $raw = $self->raw;
|
|
|
|
### don't know why this one is different from the one we /write/ ###
|
|
substr ($raw, 148, 8) = " ";
|
|
|
|
### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
|
|
### like GNU tar does. See here for details:
|
|
### http://www.gnu.org/software/tar/manual/tar.html#SEC139
|
|
### so we do both a signed AND unsigned validate. if one succeeds, that's
|
|
### good enough
|
|
return ( (unpack ("%16C*", $raw) == $self->chksum)
|
|
or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
|
|
}
|
|
|
|
=head2 $bool = $file->has_content
|
|
|
|
Returns a boolean to indicate whether the current object has content.
|
|
Some special files like directories and so on never will have any
|
|
content. This method is mainly to make sure you don't get warnings
|
|
for using uninitialized values when looking at an object's content.
|
|
|
|
=cut
|
|
|
|
sub has_content {
|
|
my $self = shift;
|
|
return defined $self->data() && length $self->data() ? 1 : 0;
|
|
}
|
|
|
|
=head2 $content = $file->get_content
|
|
|
|
Returns the current content for the in-memory file
|
|
|
|
=cut
|
|
|
|
sub get_content {
|
|
my $self = shift;
|
|
$self->data( );
|
|
}
|
|
|
|
=head2 $cref = $file->get_content_by_ref
|
|
|
|
Returns the current content for the in-memory file as a scalar
|
|
reference. Normal users won't need this, but it will save memory if
|
|
you are dealing with very large data files in your tar archive, since
|
|
it will pass the contents by reference, rather than make a copy of it
|
|
first.
|
|
|
|
=cut
|
|
|
|
sub get_content_by_ref {
|
|
my $self = shift;
|
|
|
|
return \$self->{data};
|
|
}
|
|
|
|
=head2 $bool = $file->replace_content( $content )
|
|
|
|
Replace the current content of the file with the new content. This
|
|
only affects the in-memory archive, not the on-disk version until
|
|
you write it.
|
|
|
|
Returns true on success, false on failure.
|
|
|
|
=cut
|
|
|
|
sub replace_content {
|
|
my $self = shift;
|
|
my $data = shift || '';
|
|
|
|
$self->data( $data );
|
|
$self->size( length $data );
|
|
return 1;
|
|
}
|
|
|
|
=head2 $bool = $file->rename( $new_name )
|
|
|
|
Rename the current file to $new_name.
|
|
|
|
Note that you must specify a Unix path for $new_name, since per tar
|
|
standard, all files in the archive must be Unix paths.
|
|
|
|
Returns true on success and false on failure.
|
|
|
|
=cut
|
|
|
|
sub rename {
|
|
my $self = shift;
|
|
my $path = shift;
|
|
|
|
return unless defined $path;
|
|
|
|
my ($prefix,$file) = $self->_prefix_and_file( $path );
|
|
|
|
$self->name( $file );
|
|
$self->prefix( $prefix );
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 $bool = $file->chmod $mode)
|
|
|
|
Change mode of $file to $mode. The mode can be a string or a number
|
|
which is interpreted as octal whether or not a leading 0 is given.
|
|
|
|
Returns true on success and false on failure.
|
|
|
|
=cut
|
|
|
|
sub chmod {
|
|
my $self = shift;
|
|
my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
|
|
$self->{mode} = oct($mode);
|
|
return 1;
|
|
}
|
|
|
|
=head2 $bool = $file->chown( $user [, $group])
|
|
|
|
Change owner of $file to $user. If a $group is given that is changed
|
|
as well. You can also pass a single parameter with a colon separating the
|
|
use and group as in 'root:wheel'.
|
|
|
|
Returns true on success and false on failure.
|
|
|
|
=cut
|
|
|
|
sub chown {
|
|
my $self = shift;
|
|
my $uname = shift;
|
|
return unless defined $uname;
|
|
my $gname;
|
|
if (-1 != index($uname, ':')) {
|
|
($uname, $gname) = split(/:/, $uname);
|
|
} else {
|
|
$gname = shift if @_ > 0;
|
|
}
|
|
|
|
$self->uname( $uname );
|
|
$self->gname( $gname ) if $gname;
|
|
return 1;
|
|
}
|
|
|
|
=head1 Convenience methods
|
|
|
|
To quickly check the type of a C<Archive::Tar::File> object, you can
|
|
use the following methods:
|
|
|
|
=over 4
|
|
|
|
=item $file->is_file
|
|
|
|
Returns true if the file is of type C<file>
|
|
|
|
=item $file->is_dir
|
|
|
|
Returns true if the file is of type C<dir>
|
|
|
|
=item $file->is_hardlink
|
|
|
|
Returns true if the file is of type C<hardlink>
|
|
|
|
=item $file->is_symlink
|
|
|
|
Returns true if the file is of type C<symlink>
|
|
|
|
=item $file->is_chardev
|
|
|
|
Returns true if the file is of type C<chardev>
|
|
|
|
=item $file->is_blockdev
|
|
|
|
Returns true if the file is of type C<blockdev>
|
|
|
|
=item $file->is_fifo
|
|
|
|
Returns true if the file is of type C<fifo>
|
|
|
|
=item $file->is_socket
|
|
|
|
Returns true if the file is of type C<socket>
|
|
|
|
=item $file->is_longlink
|
|
|
|
Returns true if the file is of type C<LongLink>.
|
|
Should not happen after a successful C<read>.
|
|
|
|
=item $file->is_label
|
|
|
|
Returns true if the file is of type C<Label>.
|
|
Should not happen after a successful C<read>.
|
|
|
|
=item $file->is_unknown
|
|
|
|
Returns true if the file type is C<unknown>
|
|
|
|
=back
|
|
|
|
=cut
|
|
|
|
#stupid perl5.5.3 needs to warn if it's not numeric
|
|
sub is_file { local $^W; FILE == $_[0]->type }
|
|
sub is_dir { local $^W; DIR == $_[0]->type }
|
|
sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
|
|
sub is_symlink { local $^W; SYMLINK == $_[0]->type }
|
|
sub is_chardev { local $^W; CHARDEV == $_[0]->type }
|
|
sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
|
|
sub is_fifo { local $^W; FIFO == $_[0]->type }
|
|
sub is_socket { local $^W; SOCKET == $_[0]->type }
|
|
sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
|
|
sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
|
|
sub is_label { local $^W; LABEL eq $_[0]->type }
|
|
|
|
1;
|