#!/bin/bash

# Description : A jiffy to search CTAN file list
# Author      : Anh K. Huynh <[email protected]>
# Version     : $Id: ctan 82 2009-09-30 01:51:59Z pi $
# License     : LPPL (any version)
# Homepage    : CTAN:/support/ctan.tools
# Document    : self documented, try `$0 about`
# Requirements: bash, lynx, wget, tar, bzip2,
#               sed, sort, awk, (e)grep, cut
# TODO        : fancy get options (code/interface)

export PATH=$PATH:$HOME/bin

CTAN_MIRROR=http://mirror.ctan.org/
CTAN_ARCHIVE=http://ctan.org/tex-archive          # to be updated
CTAN_CONTRIB=$CTAN_ARCHIVE/macros/latex/contrib/  # to be updated
CTAN_FILES=$CTAN_ARCHIVE/FILES.byname             # to be updated
VIETTUG_CACHEDIR=http://viettug.org/ctan.tools/
DESTDIR=$HOME/.ctan/
TOOLS="lynx wget tar bzip2 sed sort awk grep"
HAVE_LZMA=0

# functions

msg()
{
 echo -e ":: $*"
}

# check for executable tool

ctan_check()
{
 type $1 2>/dev/null 1>&2
 if [ $? -gt 0 ]; then
   ERROR=1
   msg "not found '$1'"
   return 1
 fi
}

# check for all tools

ctan_check_all()
{
 msg "checking dependencies..."
 ERROR=0
 for tool in $TOOLS; do
   ctan_check $tool
 done
 if [ $ERROR = 1 ]; then
   msg "error occured. exit 1"
   exit 1
 fi
 msg "tools found: $TOOLS"

 ctan_check lzma
 if [ $? = 0 ]; then
   HAVE_LZMA=1
 fi

 return 0
}

# mirror detection
# see also http://viettug.org/blogs/show/310

ctan_mirror_select()
{
 msg "Hello, this is ctan_mirror_select"
 src="$1"
 msg "found $src"
 while true; do
   location="`wget --max-redirect=0 $src -O - 2>&1 1>/dev/null| grep Location: 2>/dev/null`"
   if [ $? -eq 0 ]; then
     src="`echo $location | cut -d' ' -f2`"
     msg "found $src"
   else
     break
   fi
 done
 CTAN_ARCHIVE="$src"
 CTAN_CONTRIB="$CTAN_ARCHIVE/macros/latex/contrib/"
 CTAN_FILES="$CTAN_ARCHIVE/FILES.byname"
 msg "mirror detected: $CTAN_ARCHIVE"
}

# show information after update

ctan_info()
{
   msg "* `wc -l $DESTDIR/packages.txt |awk '{print $1}'` package(s) found"
   msg "* `wc -l $DESTDIR/files.txt |awk '{print $1}'` file(s) found"
}

# update database

ctan_update()
{
 if [ "x$1" = "x--first-time" ]; then
   shift
   msg "This maybe the first time you use the tool."
   msg "The directory ~/.ctan will be created"
 fi

 mkdir -pv $DESTDIR

 # using the cache file http://kyanh.net/ctan.tools/cache.tbz

 if [[ ( "x$1" == "x" ) || ( "x$1" == "x--use-cache" ) ]]; then
   shift
   msg "update by using cache file..."

   if [ $HAVE_LZMA = 1 ]; then
     msg "getting $VIETTUG_CACHEDIR/ctan.tools/cache.tlzma..."
     wget $VIETTUG_CACHEDIR/cache.tlzma -O $DESTDIR/cache.tlzma

     msg "uncompressing $DESTDIR/cache.tlzma..."
     lzma --decompress < $DESTDIR/cache.tlzma | tar xvf - -C $DESTDIR
   else
     msg "getting $VIETTUG_CACHEDIR/ctan.tools/cache.tbz..."
     wget $VIETTUG_CACHEDIR/cache.tbz -O $DESTDIR/cache.tbz

     msg "uncompressing $DESTDIR/cache.tbz..."
     tar xfjv $DESTDIR/cache.tbz -C $DESTDIR
   fi

   msg "now remove some big files"
   rm -fv $DESTDIR/{dump_files.txt,cache.tbz,cache.tlzma}

   ctan_info

   return 0
 fi

 # updating by downloading database from CTAN
 # previous version uses a search result returned by CTAN
 # since 1.5.0, Jim Hefferon suggest to use FILES.byname
 # see also (ctan_joiner)

 msg "update will get about 8MB from CTAN and may take very long time."
 read -n1 -p":: Are you sure you wanna do this (y/N)? " reply
 echo ""
 if [ ! "x$reply" = "xy" ]; then
   msg "Update was cancelled."
   return 0
 fi

 ctan_mirror_select $CTAN_MIRROR

 msg "searching all files. please wait as this is a heavy task"
 lynx -dump $CTAN_FILES > $DESTDIR/tmp0.txt
 # cp $HOME/projects/ctan.tools/FILES.byname $DESTDIR/tmp0.txt
 if [ $? -gt 0 ]; then
   msg "failed to download from CTAN. exit 1"
   rm -fv $DESTDIR/tmp0.txt
   exit 1
 fi

 msg "getting list of LaTeX packages..."
 ctan_joiner $DESTDIR/tmp0.txt \
   |sed -e 's# ##g' \
   > $DESTDIR/files.txt
 pkg_filter='|macros/latex/contrib/'
 grep $pkg_filter $DESTDIR/files.txt \
   |grep -v 'info/' \
   |grep -E '\.(zip|sty|dtx)' \
   |sed -e 's#/[^/]*$##g' \
   |sed -e "s#$pkg_filter#|#g" \
   |sort -t'|' -uk3 \
   > $DESTDIR/packages.txt
 msg "removing temporary file..."
 rm -fv $DESTDIR/tmp0.txt

 msg "compressing the results..."
 cd $DESTDIR

 msg "creating tbz archive..."
 tar -cvj -f .cache.tbz {files,packages}.txt
 mv .cache.tbz cache.tbz

 if [ $HAVE_LZMA = 1 ]; then
   msg "creating lzma archive..."
   tar -cv --use-compress-program lzma -f .cache.tlzma {files,packages}.txt
   mv .cache.tlzma cache.tlzma
 fi

 ctan_info

 if [[ "x$1" == "x--keep-only-cache" ]]; then
   msg "removing all temporary files. Only cache is kept"
   rm -fv $DESTDIR/*.txt
 fi
}

# search something

ctan_search()
{
 [ -d $DESTDIR/ ] || ctan_update --first-time --use-cache
 if [ "x$1" = "x-file" ]; then
   shift
   grep $* $DESTDIR/files.txt \
     |grep -v obsolete \
     |awk -F '|' '{printf("%s | %s\t| %s\n",$1, $2, $3)}'
 else # search package name
   grep $* $DESTDIR/packages.txt \
     |grep -v obsolete \
     |awk -F '|' '{printf("%s | %s\t| %s\n",$1, $2, $3)}'
 fi
 if [ $? -gt 0 ]; then
   msg "no package/file found"
   return 1
 fi
}

# download files

ctan_get()
{
 RETFOUND=$DESTDIR/tmp2.txt
 RETDOWN=$DESTDIR/tmp.txt
 ctan_search $* > $RETFOUND

 if [ $? -gt 0 ]; then
   msg "no package matches '$*'"
   return
 fi
 if [ "x$1" = "x-file" ]; then
   shift
   ctan_grep_type='file'
 else
   ctan_grep_type='pkg'
 fi

 cat $RETFOUND
 awk -F'|' '{print $3}' $RETFOUND > $RETDOWN

 if [ "$ctan_grep_type" = "file" ]; then
   read -n 1 -p ":: Are you sure you want to download (y/N)? " download
   echo ''
   if [[ "x$download" == "xy" ]]; then
     ctan_mirror_select $CTAN_MIRROR
     for pkg in `cat $RETDOWN`; do
       msg "downloading $pkg..."
       wget -nv -c "$CTAN_ARCHIVE/$pkg";
     done
   fi
 else
   read -n 1 -p "Are you sure you want to download (y/N)? " download
   echo ''
   if [[ "x$download" == "xy" ]]; then
     ctan_mirror_select $CTAN_MIRROR
     for pkg in `cat $RETDOWN`; do
       msg "downloading ${pkg}.zip..."
       wget -nv -c "$CTAN_CONTRIB/${pkg}.zip";
     done
   fi
 fi
}

#
# awk script to join lines in FILES.byname
# some server wraps long lines hence we need this script
#
ctan_joiner()
{
 awk '
   BEGIN {
     a = ""
   }
   {
     if ( $0 ~ /^[0-9]+/ ) {
       if (a != "") print a
       a=$0
     }
     else{
       a = (a $0)
     }
   }
   END {
     if ( $0 !~ /^[0-9]+/ ) {
       a = (a $0)
     }
     else {
       a = $0
     }
     if (a != "") print a
   }' \
   $*
}

ctan_about()
{
 msg "ABOUT"
 msg "\tThis jifty tool helps you to seach/download LaTeX packages listed at"
 msg "\t\t$CTAN_CONTRIB"
 msg "\tor search/download any files in $CTAN_ARCHIVE (or its mirror)."
 msg ""
 msg "\tThe tool requires following programs"
 msg "\t\t$TOOLS"
 msg
 msg "\tThis tool was written by Anh K. Huynh <[email protected]>."
 msg "VERSION"
 msg "\t1.0.0 2008/05/11: first version"
 msg "\t1.1.0 2008/05/15: search files (use cache file)"
 msg "\t1.2.0 2008/05/16: smaller cache. Thanks to Karl Berry <[email protected]>"
 msg "\t1.3.0 2008/05/23: everything is cached. Faster. New options. Thanks to texer."
 msg "\t1.4.0 2009/09/12: auto. select ctan mirror (suggested by Jim Hefferon)"
 msg "\t1.5.0 2009/09/13: load database from http://mirror.ctan.org/FILES.byname"
 ctan_usage
 msg "TODO"
 msg "\t* package information supported"
 msg "\t* search by package description,..."
 msg "\t* package build script"
 msg "THANKS to"
 msg "\t* Karl Berry <[email protected]>"
 msg "\t* Nguyen Van Hanh <[email protected]>"
 msg "\t* Jim Hefferon <[email protected]>"
 msg "LICENSE"
 msg "\tThis tool is published under LPPL."
 msg "BUGS"
 msg "\tPlease report to kyanh <[email protected]>."
}

ctan_usage()
{
 msg "USAGE"
 msg "\tctan about           : show all information about this tool"
 msg "\tctan usage           : show usage"
 msg "\tctan version         : show script version"
 msg
 msg "\tctan update          : update using $VIETTUG_CACHEDIR/cache.tbz (~1MB)"
 msg "\tctan update --direct : update directly from CTAN. You are going to download 8MB"
 msg "\tctan grep <string>   : search packages match <string>. grep ability is supported"
 msg "\tctan get  <string>   : download packages match <string> to working directory"
 msg
 msg "\tIf you want to search files:"
 msg
 msg "\tctan fgrep <string>  : search files."
 msg "\tctan fget  <string>  : download files match <string> to working directory"
 msg "\tctan grep -file <string>"
 msg "\tctan get  -file <string>"
 msg "EXAMPLES"
 msg "\tctan grep theorem    # search packages match 'theorem'"
 msg "\tctan grep ^n         # search packages srated by 'n'"
 msg "\tctan get  ^n         # download packages started by 'n'"
 msg ""
 msg "\tctan get -file contrib/ntheorem.zip"
 msg "\tctan fget      contrib/ntheorem.zip"
 msg "\tctan fgrep     ntheorem | grep zip"
}

ctan_arg()
{
 if [ "x$1" = "x-file" ]; then
   shift
 fi
 if [ "x$1" = "x" ]; then
   msg "missing parameter"
   exit 1
 fi
}

# check for requirements or not

not_check="xusage:xhelp:xabout:xdoc:xversion:xcheck:"
if ! `echo $not_check | grep -q "x$1"`; then
 ctan_check_all
fi

# main program

case "x$1" in
"xupdate")
 shift
 ctan_update $*
 ;;
"xfgrep")
 shift
 ctan_arg $*
 ctan_search -file $*
 ;;
"xgrep")
 shift
 ctan_arg $*
 ctan_search $*
 ;;
"xget")
 shift
 ctan_arg $*
 ctan_get $*
 ;;
"xinfo")
 shift
 ctan_info
 ;;
"xfget")
 shift
 ctan_arg $*
 ctan_get -file $*
 ;;
"x")  ctan_usage;;
"xusage") ctan_usage;;
"xhelp") ctan_usage;;
"xdoc") ctan_about;;
"xabout") ctan_about;;
"xversion") msg '$Id: ctan 82 2009-09-30 01:51:59Z pi $';;
"xcheck") ctan_check_all;;
*)
 msg "wrong parameter. please try 'ctan usage'"
 ;;
esac