Universität Ulm, Fakultät für Mathematik und Wirtschaftswissenschaften, SAI

Lösung zu Blatt 4 --- UNIX Datenbanken II (WS 1999/2000)

5. Eine persönliche Filmdatenbank

#!/usr/local/bin/perl -w

use strict;
use Compress::Zlib;
use DB_File;

my $expr_act = shift || '^Schwarzen';
my $inputfilea =
   "/www/thales/ftp/pub/vorlesungen/ws99/uxdb2/uebungen/imdb/actors.list.gz";
my $inputfilef =
   "/www/thales/ftp/pub/vorlesungen/ws99/uxdb2/uebungen/imdb/ctresses.list.gz";
my $inputfileg =
   "/www/thales/ftp/pub/vorlesungen/ws99/uxdb2/uebungen/imdb/genres.list.gz";
my $databaseg = "/home/thales/sai/lehre/ws99/uxdb2/4/db_genre";
my %dbg; my $regok;
my $gz = gzopen($inputfilea, "rb");
die "Unable to open $inputfilea: $!\n" unless defined $gz;

my $input; my $in_actor = 0; my $actor = undef; my %actor = (); my $movie;
while ($gz->gzreadline($input) > 0) { # Read at least one char
   chomp $input;
   if ($in_actor) { # Reading more movies of actual actor
      unless (($movie) = ($input =~ /^\t+(\S.*)$/)) {
	 $in_actor = 0; next;
      }
   } else { # Found a new actor
      next unless ($actor, $movie) =
         ($input =~ /^(['A-Za-z0-9].*?)\t+(.*?\([0-9]{4}\).*)$/);
      eval {
	 $regok = $actor =~ /$expr_act/;
      };
      die "Unable to use regular expression $expr_act" if ($@);
      next unless $regok >= 1;
      $in_actor = 1;
      $actor{$actor} = [];
   }
   push(@{$actor{$actor}}, $movie);
}
$gz->gzclose;
print join(" ", keys %actor); # OK, got all actors
my %movies = (); my $genre; my $title;
tie(%dbg, 'DB_File', $databaseg, O_RDWR|O_CREAT, 0664, $DB_HASH);
# Look for movies with unknown genre
foreach $actor (sort keys %actor) {
   foreach $movie (@{$actor{$actor}}) {
      $title = ($movie =~ /^([^[<]*?)\s*[[<]/)?$1:$movie;
      unless (defined $dbg{$title}) {
	 $movies{$title} = [];
	 $dbg{$title} = "Unknown";
      }
      print "Doing work for $title.\n";
   }
}
if ((keys %movies) > 0) { # OK, we'll have to read the genres, too
   $gz = gzopen($inputfileg, "rb");
   die "Unable to open $inputfileg: $!\n" unless defined $gz;
   while ($gz->gzreadline($input) > 0) {
      chomp $input;
      next unless ($movie, $genre) =
	 ($input =~ /^(['A-Za-z0-9].*?\([0-9]{4}\).*?)\s*([A-Z][-a-zA-Z]*)$/);
      next unless defined $movies{$movie};
      push(@{$movies{$movie}}, $genre);
      if (defined $dbg{$movie} and $dbg{$movie} ne "Unknown") {
	 $dbg{$movie} .= "\0" . $genre
      } else {
	 $dbg{$movie} = $genre
      }
   }
   $gz->gzclose;
}
my $output;
foreach $movie (sort keys %dbg) {
   ($output = $dbg{$movie}) =~ s/\0/\t/g;
   print "$movie:\t$output\n";
}
untie %dbg;

Universität Fakultät SAI

Ingo Melzer, 19. November 1999