230 lines
7.7 KiB
Perl
230 lines
7.7 KiB
Perl
|
#------------------------------------------------------------------------------
|
||
|
# File: iWork.pm
|
||
|
#
|
||
|
# Description: Read Apple iWork '09 XML+ZIP files
|
||
|
#
|
||
|
# Revisions: 2009/11/11 - P. Harvey Created
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
package Image::ExifTool::iWork;
|
||
|
|
||
|
use strict;
|
||
|
use vars qw($VERSION);
|
||
|
use Image::ExifTool qw(:DataAccess :Utils);
|
||
|
use Image::ExifTool::XMP;
|
||
|
use Image::ExifTool::ZIP;
|
||
|
|
||
|
$VERSION = '1.05';
|
||
|
|
||
|
# test for recognized iWork document extensions and outer XML elements
|
||
|
my %iWorkType = (
|
||
|
# file extensions
|
||
|
NUMBERS => 'NUMBERS',
|
||
|
PAGES => 'PAGES',
|
||
|
KEY => 'KEY',
|
||
|
KTH => 'KTH',
|
||
|
NMBTEMPLATE => 'NMBTEMPLATE',
|
||
|
# we don't support double extensions --
|
||
|
# "PAGES.TEMPLATE" => 'Apple Pages Template',
|
||
|
# outer XML elements
|
||
|
'ls:document' => 'NUMBERS',
|
||
|
'sl:document' => 'PAGES',
|
||
|
'key:presentation' => 'KEY',
|
||
|
);
|
||
|
|
||
|
# MIME types for iWork files (Apple has not registered these yet, but these
|
||
|
# are my best guess after doing some googling. I'm not 100% sure what "sff"
|
||
|
# indicates, but I think it refers to the new "flattened" package format)
|
||
|
my %mimeType = (
|
||
|
'NUMBERS' => 'application/x-iwork-numbers-sffnumbers',
|
||
|
'PAGES' => 'application/x-iwork-pages-sffpages',
|
||
|
'KEY' => 'application/x-iWork-keynote-sffkey',
|
||
|
'NMBTEMPLATE' => 'application/x-iwork-numbers-sfftemplate',
|
||
|
'PAGES.TEMPLATE'=> 'application/x-iwork-pages-sfftemplate',
|
||
|
'KTH' => 'application/x-iWork-keynote-sffkth',
|
||
|
);
|
||
|
|
||
|
# iWork tags
|
||
|
%Image::ExifTool::iWork::Main = (
|
||
|
GROUPS => { 0 => 'XML', 1 => 'XML', 2 => 'Document' },
|
||
|
PROCESS_PROC => \&Image::ExifTool::XMP::ProcessXMP,
|
||
|
VARS => { NO_ID => 1 },
|
||
|
NOTES => q{
|
||
|
The Apple iWork '09 file format is a ZIP archive containing XML files
|
||
|
similar to the Office Open XML (OOXML) format. Metadata tags in iWork
|
||
|
files are extracted even if they don't appear below.
|
||
|
},
|
||
|
authors => { Name => 'Author', Groups => { 2 => 'Author' } },
|
||
|
comment => { },
|
||
|
copyright => { Groups => { 2 => 'Author' } },
|
||
|
keywords => { },
|
||
|
projects => { List => 1 },
|
||
|
title => { },
|
||
|
);
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Generate a tag ID for this XML tag
|
||
|
# Inputs: 0) tag property name list ref
|
||
|
# Returns: tagID
|
||
|
sub GetTagID($)
|
||
|
{
|
||
|
my $props = shift;
|
||
|
return 0 if $$props[-1] =~ /^\w+:ID$/; # ignore ID tags
|
||
|
return ($$props[0] =~ /^.*?:(.*)/) ? $1 : $$props[0];
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# We found an XMP property name/value
|
||
|
# Inputs: 0) ExifTool object ref, 1) tag table ref
|
||
|
# 2) reference to array of XMP property names (last is current property)
|
||
|
# 3) property value, 4) attribute hash ref (not used here)
|
||
|
# Returns: 1 if valid tag was found
|
||
|
sub FoundTag($$$$;$)
|
||
|
{
|
||
|
my ($et, $tagTablePtr, $props, $val, $attrs) = @_;
|
||
|
return 0 unless @$props;
|
||
|
my $verbose = $et->Options('Verbose');
|
||
|
|
||
|
$et->VPrint(0, " | - Tag '", join('/',@$props), "'\n") if $verbose > 1;
|
||
|
|
||
|
# un-escape XML character entities
|
||
|
$val = Image::ExifTool::XMP::UnescapeXML($val);
|
||
|
# convert from UTF8 to ExifTool Charset
|
||
|
$val = $et->Decode($val, 'UTF8');
|
||
|
my $tag = GetTagID($props) or return 0;
|
||
|
|
||
|
# add any unknown tags to table
|
||
|
unless ($$tagTablePtr{$tag}) {
|
||
|
$et->VPrint(0, " [adding $tag]\n") if $verbose;
|
||
|
AddTagToTable($tagTablePtr, $tag, { Name => ucfirst $tag });
|
||
|
}
|
||
|
# save the tag
|
||
|
$et->HandleTag($tagTablePtr, $tag, $val);
|
||
|
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Extract information from an iWork file
|
||
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference
|
||
|
# Returns: 1
|
||
|
# Notes: Upon entry to this routine, the file type has already been verified
|
||
|
# as ZIP and the dirInfo hash contains a 'ZIP' Archive::Zip object reference
|
||
|
sub Process_iWork($$)
|
||
|
{
|
||
|
my ($et, $dirInfo) = @_;
|
||
|
my $zip = $$dirInfo{ZIP};
|
||
|
my ($type, $index, $indexFile, $status);
|
||
|
|
||
|
# try to determine the file type
|
||
|
local $SIG{'__WARN__'} = \&Image::ExifTool::ZIP::WarnProc;
|
||
|
# trust type given by file extension if available
|
||
|
$type = $iWorkType{$$et{FILE_EXT}} if $$et{FILE_EXT};
|
||
|
unless ($type) {
|
||
|
# read the index file
|
||
|
my @members = $zip->membersMatching('^index\.(xml|apxl)$');
|
||
|
if (@members) {
|
||
|
($index, $status) = $zip->contents($members[0]);
|
||
|
unless ($status) {
|
||
|
$indexFile = $members[0]->fileName();
|
||
|
if ($index =~ /^\s*<\?xml version=[^<]+<(\w+:\w+)/s) {
|
||
|
$type = $iWorkType{$1} if $iWorkType{$1};
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
@members = $zip->membersMatching('(?i)^.*\.(pages|numbers|key)/Index.*');
|
||
|
if (@members) {
|
||
|
my $tmp = $members[0]->fileName();
|
||
|
$type = $iWorkType{uc $1} if $tmp =~ /\.(pages|numbers|key)/i;
|
||
|
}
|
||
|
}
|
||
|
$type or $type = 'ZIP'; # assume ZIP by default
|
||
|
}
|
||
|
$et->SetFileType($type, $mimeType{$type});
|
||
|
|
||
|
my @members = $zip->members();
|
||
|
my $docNum = 0;
|
||
|
my $member;
|
||
|
foreach $member (@members) {
|
||
|
# get filename of this ZIP member
|
||
|
my $file = $member->fileName();
|
||
|
next unless defined $file;
|
||
|
$et->VPrint(0, "File: $file\n");
|
||
|
# set the document number and extract ZIP tags
|
||
|
$$et{DOC_NUM} = ++$docNum;
|
||
|
Image::ExifTool::ZIP::HandleMember($et, $member);
|
||
|
|
||
|
# process only the index XML and JPEG thumbnail/preview files
|
||
|
next unless $file =~ m{^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+/preview.jpg)$}i;
|
||
|
# get the file contents if necessary
|
||
|
# (CAREFUL! $buff MUST be local since we hand off a value ref to PreviewImage)
|
||
|
my ($buff, $buffPt);
|
||
|
if ($indexFile and $indexFile eq $file) {
|
||
|
# use the index file we already loaded
|
||
|
$buffPt = \$index;
|
||
|
} else {
|
||
|
($buff, $status) = $zip->contents($member);
|
||
|
$status and $et->Warn("Error extracting $file"), next;
|
||
|
$buffPt = \$buff;
|
||
|
}
|
||
|
# extract JPEG as PreviewImage (should only be QuickLook/Thumbnail.jpg)
|
||
|
if ($file =~ /\.jpg$/) {
|
||
|
$et->FoundTag('PreviewImage', $buffPt);
|
||
|
next;
|
||
|
}
|
||
|
# process "metadata" section of XML index file
|
||
|
next unless $$buffPt =~ /<(\w+):metadata>/g;
|
||
|
my $ns = $1;
|
||
|
my $p1 = pos $$buffPt;
|
||
|
next unless $$buffPt =~ m{</${ns}:metadata>}g;
|
||
|
# construct XML data from "metadata" section only
|
||
|
$$buffPt = '<?xml version="1.0"?>' . substr($$buffPt, $p1, pos($$buffPt)-$p1);
|
||
|
my %dirInfo = (
|
||
|
DataPt => $buffPt,
|
||
|
DirLen => length $$buffPt,
|
||
|
DataLen => length $$buffPt,
|
||
|
XMPParseOpts => {
|
||
|
FoundProc => \&FoundTag,
|
||
|
},
|
||
|
);
|
||
|
my $tagTablePtr = GetTagTable('Image::ExifTool::iWork::Main');
|
||
|
$et->ProcessDirectory(\%dirInfo, $tagTablePtr);
|
||
|
undef $$buffPt; # (free memory now)
|
||
|
}
|
||
|
delete $$et{DOC_NUM};
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
1; # end
|
||
|
|
||
|
__END__
|
||
|
|
||
|
=head1 NAME
|
||
|
|
||
|
Image::ExifTool::iWork - Read Apple iWork '09 XML+ZIP files
|
||
|
|
||
|
=head1 SYNOPSIS
|
||
|
|
||
|
This module is used by Image::ExifTool
|
||
|
|
||
|
=head1 DESCRIPTION
|
||
|
|
||
|
This module contains definitions required by Image::ExifTool to extract meta
|
||
|
information from Apple iWork '09 XML+ZIP files.
|
||
|
|
||
|
=head1 AUTHOR
|
||
|
|
||
|
Copyright 2003-2018, Phil Harvey (phil at owl.phy.queensu.ca)
|
||
|
|
||
|
This library is free software; you can redistribute it and/or modify it
|
||
|
under the same terms as Perl itself.
|
||
|
|
||
|
=head1 SEE ALSO
|
||
|
|
||
|
L<Image::ExifTool::TagNames/iWork Tags>,
|
||
|
L<Image::ExifTool::TagNames/OOXML Tags>,
|
||
|
L<Image::ExifTool(3pm)|Image::ExifTool>
|
||
|
|
||
|
=cut
|
||
|
|