### ==================================================================== ### @Awk-file{ ### author = "Nelson H. F. Beebe", ### version = "0.01", ### date = "01 December 2001", ### time = "11:31:47 MST", ### filename = "lstohtml.awk", ### address = "Center for Scientific Computing ### Department of Mathematics ### University of Utah ### Salt Lake City, UT 84112 ### USA", ### telephone = "+1 801 581 5254", ### FAX = "+1 801 581 4148", ### URL = "http://www.math.utah.edu/~beebe", ### checksum = "54804 358 1158 10175", ### email = "beebe@math.utah.edu (Internet)", ### codetable = "ISO/ASCII", ### keywords = "HTML. UNIX directory listing, UNIX file listing", ### supported = "yes", ### docstring = "Convert a UNIX `ls -l' directory listing to ### HTML, with hypertext links for every file. ### This program will also handle the abbreviated ### directory listings used in our local ftp ### tree, such as 00last*.lst. ### ### For security reasons, all files that are inside ### world-unreadable directories, and files that ### are not world-readable, are excluded from the ### output. ### ### Usage: ### awk -f lstohtml.awk [INDENT=n] PREFIX=xxx \ ### ls-l-listing >ls-l.html ### ### The command-line option INDENT=n may be used ### to set the number of spaces of indentation ### for each level of HTML. The default is INDENT=4. ### ### The command-line option PREFIX=xxx must be ### used to supply a suitable prefix to filenames ### recorded in HREF assignments in order to ### locate files. If it is omitted, the ### hypertext links created cannot be followed. ### ### The checksum field above contains a CRC-16 ### checksum as the first value, followed by the ### equivalent of the standard UNIX wc (word ### count) utility output of lines, words, and ### characters. This is produced by Robert ### Solovay's checksum utility.", ### } ### ==================================================================== BEGIN { VERSION = "0.01" # these MUST match version DATE = "[01-Dec-2001]" # and date above initialize() } NR == 1 { html_banner() html_header() } (NF == 1) && ($1 ~ /:$/) { new_directory($1); next } (NF >= 3) && ($0 ~ /^# Directory: /) { new_directory_2($3); next } (NF == 2) && ($1 == "total") { print_total(3,$0); next } $1 ~ /^[cdl-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-]$/ \ { print_ls_line(3); next } /^ *$/ { next } # ignore blank lines /^ *#/ { print_comment($0); next } # print everything else { print_prefix(3,make_escape_sequences($0)); next } END { html_trailer() } function begin_save() { save_mode = 1 } function end_save( k) { save_mode = 0 for (k = 1; k <= lineno; ++k) print save_line[k] lineno = 0 } function begin_verbatim() { end_verbatim() print_prefix(0,"
")
    in_verbatim = 1
}


function end_verbatim()
{
    if (in_verbatim)
    {
	print_prefix(0,"
") in_verbatim = 0 } } function html_banner() { print "" # for GNU Emacs mode selection if (FILE != "") print "" print "" print "" print "" print "" if (FILE != "") print "" print "" } function html_header() { sub(/\/$/,"",PREFIX) # remove any trailing slash print_prefix(0,"\n") print_prefix(0,"") print_prefix(1,"") print_prefix(2,"") print_prefix(3,"Index of files at " make_escape_sequences(PREFIX)) print_prefix(2,"") print_prefix(2,"") print_prefix(1,"") print_prefix(1,"") print_prefix(2,"

") print_prefix(3,"Index of files at " make_escape_sequences(PREFIX)) print_prefix(2,"

") begin_save() } function html_trailer() { print_directory() end_save() end_verbatim() print_prefix(2,"
") print_prefix(1,"") print_prefix(0,"") } function initialize() { if (FILE == "") FILE = FILENAME if (FILE == "-") FILE = "" if (INDENT == "") INDENT = 4 lineno = 0 save_mode = 0 in_verbatim = 0 "uname -n" | getline the_host if (the_host == "") "hostname" | getline the_host "date" | getline the_time "echo $LOGNAME" | getline the_user "cat /etc/passwd | awk -F: '/^" the_user ":/ {print $5}' | head -1" | \ getline the_person if (the_person == "") "ypcat passwd | awk -F: '/^" the_user ":/ {print $5}' | head -1" | \ getline the_person } function make_escape_sequences(s) { gsub(/&/,"\\&",s) gsub(//,"\\>",s) gsub(/\"/,"\\"",s) return (s) } function new_directory(s) { if (in_verbatim) end_verbatim() DIRECTORY = s sub(/:$/,"",DIRECTORY) # strip trailing colon sub(/^[.]?\//,"",DIRECTORY) # delete leading "./" or "/" sub(/\/+$/,"",DIRECTORY) # delete trailing slash(es) if ((DIRECTORY in DIRECTORY_DIRECTORY_PROTECTION) && \ DIRECTORY_PROTECTION[DIRECTORY] !~ /r.x$/) return # hide protected directories print_prefix(2,"

") print_prefix(3,"") directory[++directoryno] = "" \ make_escape_sequences(DIRECTORY) "/" "" print_prefix(4,DIRECTORY "/") print_prefix(3,"") print_prefix(2,"

") ## print_prefix(0,"DEBUG 1: " DIRECTORY " -> " DIRECTORY_PROTECTION[DIRECTORY]) begin_verbatim() } function new_directory_2(s) { if (!in_verbatim) begin_verbatim() DIRECTORY = s sub(/:$/,"",DIRECTORY) # strip trailing colon sub(/^[.]?\//,"",DIRECTORY) # delete leading "./" or "/" sub(/\/+$/,"",DIRECTORY) # delete trailing slash(es) if ((DIRECTORY in DIRECTORY_DIRECTORY_PROTECTION) && \ DIRECTORY_PROTECTION[DIRECTORY] !~ /r.x$/) return # hide protected directories print_comment("# Directory: " make_escape_sequences(DIRECTORY) "/" "") } function prefix(level) { # Return a prefix of up to 60 blanks if (in_verbatim) return ("") else return (substr(" ", 1, INDENT * level)) } function print_comment(s) { if (!in_verbatim) begin_verbatim() print_prefix(0,s) } function print_directory( k,old_save_mode) { if (directoryno > 0) { end_verbatim() # [01-Dec-2001] added to fix
...
imbalance bug old_save_mode = save_mode save_mode = 0 begin_verbatim() for (k = 1; k <= directoryno; ++k) print_prefix(3,directory[k]) end_verbatim() save_mode = old_save_mode } } function print_ls_line(level, filename,k,m,symlink) { if (!in_verbatim) # [01-Dec-2001] added to fix
...
imbalance bug begin_verbatim() for (k = NF - 1; k > 0; --k) # find end of time/date field before filename { # because filename may be multiple fields # if it contains blanks if ($k ~ /^[0-9][0-9]?:[0-9][0-9]$/) # then hh:mm field break else if ($k ~ /^[12][0-9][0-9][0-9]$/) # then year field break } m = index($0," " $k " ") filename = substr($0,m + 1) m += 1 + index(filename," ") filename = substr($0,m) if (substr($0,1,1) == "d") # directory file { DIRECTORY_PROTECTION[DIRECTORY "/" filename] = substr($0,1,10) ## print_prefix(0,"DEBUG 2: " filename " -> [" substr($0,1,10) "]") if (substr($0,8,3) !~ /^r.x$/) # hide unreadable directories return } if (substr($0,8,1) != "r") # ignore files that are not world readable return # since we they would not be accessible anyway ## print_prefix(0,"DEBUG 3: " DIRECTORY " -> [" DIRECTORY_PROTECTION[DIRECTORY] "]") if ((DIRECTORY in DIRECTORY_DIRECTORY_PROTECTION) && \ (DIRECTORY_PROTECTION[DIRECTORY] !~ /r.x$/)) return # hide files that are in protected directories sub(/^[.]\//,"",filename) # remove any "./" prefix if (substr($0,1,1) ~ "[d-]") # then ordinary file or directory print_prefix(level,substr($0,1,m-1) "" \ make_escape_sequences(filename) "") else if (substr($0,1,1) == "l") # symbolic link { k = index(filename," -> ") if (k > 0) { symlink = substr(filename,1,k) filename = substr(filename,k+1) } else { symlink = filename filename = "" } print_prefix(level,substr($0,1,m-1) "" \ make_escape_sequences(symlink) "" \ make_escape_sequences(filename) ) } else # unrecognized ls line print_prefix(level,make_escape_sequences($0)) } function print_prefix(level,line) { if (save_mode) save_line[++lineno] = prefix(level) line else print prefix(level) line } function print_total(level,line) { if ((DIRECTORY in DIRECTORY_DIRECTORY_PROTECTION) && \ DIRECTORY_PROTECTION[DIRECTORY] !~ /r.x$/) return # hide information about protected directories print_prefix(level,line) }