Universität Ulm,
Fakultät für Mathematik und Wirtschaftswissenschaften,
SAI
Lösung zu Blatt 4 --- UNIX Datenbanken II (WS 1999/2000)
5. Eine persönliche Filmdatenbank
#!/usr/local/bin/perl -w
use strict;
use Compress::Zlib;
use DB_File;
my $expr_act = shift || '^Schwarzen';
my $inputfilea =
"/www/thales/ftp/pub/vorlesungen/ws99/uxdb2/uebungen/imdb/actors.list.gz";
my $inputfilef =
"/www/thales/ftp/pub/vorlesungen/ws99/uxdb2/uebungen/imdb/ctresses.list.gz";
my $inputfileg =
"/www/thales/ftp/pub/vorlesungen/ws99/uxdb2/uebungen/imdb/genres.list.gz";
my $databaseg = "/home/thales/sai/lehre/ws99/uxdb2/4/db_genre";
my %dbg; my $regok;
my $gz = gzopen($inputfilea, "rb");
die "Unable to open $inputfilea: $!\n" unless defined $gz;
my $input; my $in_actor = 0; my $actor = undef; my %actor = (); my $movie;
while ($gz->gzreadline($input) > 0) { # Read at least one char
chomp $input;
if ($in_actor) { # Reading more movies of actual actor
unless (($movie) = ($input =~ /^\t+(\S.*)$/)) {
$in_actor = 0; next;
}
} else { # Found a new actor
next unless ($actor, $movie) =
($input =~ /^(['A-Za-z0-9].*?)\t+(.*?\([0-9]{4}\).*)$/);
eval {
$regok = $actor =~ /$expr_act/;
};
die "Unable to use regular expression $expr_act" if ($@);
next unless $regok >= 1;
$in_actor = 1;
$actor{$actor} = [];
}
push(@{$actor{$actor}}, $movie);
}
$gz->gzclose;
print join(" ", keys %actor); # OK, got all actors
my %movies = (); my $genre; my $title;
tie(%dbg, 'DB_File', $databaseg, O_RDWR|O_CREAT, 0664, $DB_HASH);
# Look for movies with unknown genre
foreach $actor (sort keys %actor) {
foreach $movie (@{$actor{$actor}}) {
$title = ($movie =~ /^([^[<]*?)\s*[[<]/)?$1:$movie;
unless (defined $dbg{$title}) {
$movies{$title} = [];
$dbg{$title} = "Unknown";
}
print "Doing work for $title.\n";
}
}
if ((keys %movies) > 0) { # OK, we'll have to read the genres, too
$gz = gzopen($inputfileg, "rb");
die "Unable to open $inputfileg: $!\n" unless defined $gz;
while ($gz->gzreadline($input) > 0) {
chomp $input;
next unless ($movie, $genre) =
($input =~ /^(['A-Za-z0-9].*?\([0-9]{4}\).*?)\s*([A-Z][-a-zA-Z]*)$/);
next unless defined $movies{$movie};
push(@{$movies{$movie}}, $genre);
if (defined $dbg{$movie} and $dbg{$movie} ne "Unknown") {
$dbg{$movie} .= "\0" . $genre
} else {
$dbg{$movie} = $genre
}
}
$gz->gzclose;
}
my $output;
foreach $movie (sort keys %dbg) {
($output = $dbg{$movie}) =~ s/\0/\t/g;
print "$movie:\t$output\n";
}
untie %dbg;
Ingo Melzer, 19. November 1999