>In article <[email protected]> [email protected] (Lionel Cons) writes:
>   I want to have a routine that gives the canonical form of a path name,
>   removing '~', '.' and '..'. For instance:
>   [rest deleted]

As [email protected] (Randal L. Schwartz) pointed out, I can't
do this properly without looking at the symbolic links. So here is my
second attempt, dealing with symlinks.

Any comments/bugs ?

Thanks.

---------------8<----------------8<-----------------8<-------------------
sub canonic {
   local($name, $cwd) = @_;

   %link = ();

   #
   # first of all get rid of the tilde
   #
   if ($name =~ /^~([^\/]*)(\/.*)?$/) {
       if ($1 eq '') {
           $path = (getpwuid($<))[7] . $2;
       } else {
           $path = (getpwnam($1))[7] . $2;
       }
   } else {
       $path = $name;
   }

   #
   # init
   #
   @todo = ();
   $parent = $cwd;
   $todo = $path;

   #
   # main loop
   #
   TODO:
   while ($todo) {
       $todo = "$parent/$todo" unless ($todo =~ /^\//);
       @new = split(/\//, $todo);
       shift(@new); # remove first part (before the first /)
       unshift(@todo, @new);
       @done = ();
       while ($_ = shift(@todo)) {
           # detect special names
           next if ($_ eq ''); # discard '/'
           next if ($_ eq '.'); # discard '/.'
           pop(@done), next if ($_ eq '..'); # discard '/..'
           # set variables
           if (@done) { # not at root
               $parent = '/' . join('/', @done);
               $file = "$parent/$_";
           } else {
               $parent = '';
               $file = "/$_";
           }
           # check symbolic link
           if (-l $file) { # symbolic link
               $todo = $link{$file} = readlink($file);
               next TODO;
           }
           push(@done, $_);
       }
       last TODO;
   }

   #
   # print result
   #
   $file = '/' unless (@done);
   print "Used symbolic links:\n";
   for (keys(%link)) {
       print "   $_ -> $link{$_}\n";
   }
   print "$path = $file\n";
}

exit(0);

################################################################################
# give the canonic name of a file, stripping '.', '..', '//', '~'
sub canonic {
   local($name, $cwd) = @_;
   local($path, $before, $after);

   #
   # first of all get rid of the tilde
   #
   if ($name =~ /^~([^\/]*)(\/.*)?$/) {
       if ($1 eq '') {
           $path = (getpwuid($<))[7] . $2;
       } else {
           $path = (getpwnam($1))[7] . $2;
       }
   }

   #
   # find an absolute path name
   #
   if ($name =~ /^\//) {
       $path = $name;
   } else {
       $path = "$cwd/$name";
   }

   #
   # remove single dots
   #
   while ($path =~ s/\/\.(\/.*)?$/$1/) {}

   #
   # remove double dots and double slashes
   #
   while (1) {
       # assuming // == /
       while ($path =~ s/\/\//\//) {}
       # find double dots
       last unless ($path =~ /^(.*)\/\.\.(\/.*)?$/);
       ($before, $after) = ($1, $2);
       if ($before eq '') { # assuming /.. == /
           $path = $after;
       } elsif ($before =~ /^(.*)\/[^\/]+$/) {
           $path = "$1$after";
       } else {
           die "Perl bug! (bad semantic for pattern matching)\n";
       }
   }

   #
   # misc transformations
   #
   if ($path eq '') {
       $path = '/';
   } elsif ($path =~ /.\/$/) {
       chop($path);
   }

   return($path);
}
--

Lionel Cons

+------- CERN - European Laboratory for Particle Physics -------+
| E-mail: [email protected]                                   |
| Earth-mail: CN/SW/WS, CERN, CH-1211 GENEVE 23, Switzerland    |
| Phone: +41 22 767 49 13              Fax: +41 22 767 71 55    |
+---------------------------------------------------------------+