### ==================================================================== ### @Awk-file{ ### author = "Nelson H. F. Beebe", ### version = "0.01", ### date = "01 December 2001", ### time = "11:31:47 MST", ### filename = "lstohtml.awk", ### address = "Center for Scientific Computing ### Department of Mathematics ### University of Utah ### Salt Lake City, UT 84112 ### USA", ### telephone = "+1 801 581 5254", ### FAX = "+1 801 581 4148", ### URL = "http://www.math.utah.edu/~beebe", ### checksum = "54804 358 1158 10175", ### email = "beebe@math.utah.edu (Internet)", ### codetable = "ISO/ASCII", ### keywords = "HTML. UNIX directory listing, UNIX file listing", ### supported = "yes", ### docstring = "Convert a UNIX `ls -l' directory listing to ### HTML, with hypertext links for every file. ### This program will also handle the abbreviated ### directory listings used in our local ftp ### tree, such as 00last*.lst. ### ### For security reasons, all files that are inside ### world-unreadable directories, and files that ### are not world-readable, are excluded from the ### output. ### ### Usage: ### awk -f lstohtml.awk [INDENT=n] PREFIX=xxx \ ### ls-l-listing >ls-l.html ### ### The command-line option INDENT=n may be used ### to set the number of spaces of indentation ### for each level of HTML. The default is INDENT=4. ### ### The command-line option PREFIX=xxx must be ### used to supply a suitable prefix to filenames ### recorded in HREF assignments in order to ### locate files. If it is omitted, the ### hypertext links created cannot be followed. ### ### The checksum field above contains a CRC-16 ### checksum as the first value, followed by the ### equivalent of the standard UNIX wc (word ### count) utility output of lines, words, and ### characters. This is produced by Robert ### Solovay's checksum utility.", ### } ### ==================================================================== BEGIN { VERSION = "0.01" # these MUST match version DATE = "[01-Dec-2001]" # and date above initialize() } NR == 1 { html_banner() html_header() } (NF == 1) && ($1 ~ /:$/) { new_directory($1); next } (NF >= 3) && ($0 ~ /^# Directory: /) { new_directory_2($3); next } (NF == 2) && ($1 == "total") { print_total(3,$0); next } $1 ~ /^[cdl-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-][rwxlsSt-]$/ \ { print_ls_line(3); next } /^ *$/ { next } # ignore blank lines /^ *#/ { print_comment($0); next } # print everything else { print_prefix(3,make_escape_sequences($0)); next } END { html_trailer() } function begin_save() { save_mode = 1 } function end_save( k) { save_mode = 0 for (k = 1; k <= lineno; ++k) print save_line[k] lineno = 0 } function begin_verbatim() { end_verbatim() print_prefix(0,"
")
in_verbatim = 1
}
function end_verbatim()
{
if (in_verbatim)
{
print_prefix(0,"")
in_verbatim = 0
}
}
function html_banner()
{
print "" # for GNU Emacs mode selection
if (FILE != "")
print ""
print ""
print ""
print ""
print ""
if (FILE != "")
print ""
print ""
}
function html_header()
{
sub(/\/$/,"",PREFIX) # remove any trailing slash
print_prefix(0,"\n")
print_prefix(0,"")
print_prefix(1,"")
print_prefix(2,"...imbalance bug old_save_mode = save_mode save_mode = 0 begin_verbatim() for (k = 1; k <= directoryno; ++k) print_prefix(3,directory[k]) end_verbatim() save_mode = old_save_mode } } function print_ls_line(level, filename,k,m,symlink) { if (!in_verbatim) # [01-Dec-2001] added to fix
...imbalance bug begin_verbatim() for (k = NF - 1; k > 0; --k) # find end of time/date field before filename { # because filename may be multiple fields # if it contains blanks if ($k ~ /^[0-9][0-9]?:[0-9][0-9]$/) # then hh:mm field break else if ($k ~ /^[12][0-9][0-9][0-9]$/) # then year field break } m = index($0," " $k " ") filename = substr($0,m + 1) m += 1 + index(filename," ") filename = substr($0,m) if (substr($0,1,1) == "d") # directory file { DIRECTORY_PROTECTION[DIRECTORY "/" filename] = substr($0,1,10) ## print_prefix(0,"DEBUG 2: " filename " -> [" substr($0,1,10) "]") if (substr($0,8,3) !~ /^r.x$/) # hide unreadable directories return } if (substr($0,8,1) != "r") # ignore files that are not world readable return # since we they would not be accessible anyway ## print_prefix(0,"DEBUG 3: " DIRECTORY " -> [" DIRECTORY_PROTECTION[DIRECTORY] "]") if ((DIRECTORY in DIRECTORY_DIRECTORY_PROTECTION) && \ (DIRECTORY_PROTECTION[DIRECTORY] !~ /r.x$/)) return # hide files that are in protected directories sub(/^[.]\//,"",filename) # remove any "./" prefix if (substr($0,1,1) ~ "[d-]") # then ordinary file or directory print_prefix(level,substr($0,1,m-1) "" \ make_escape_sequences(filename) "") else if (substr($0,1,1) == "l") # symbolic link { k = index(filename," -> ") if (k > 0) { symlink = substr(filename,1,k) filename = substr(filename,k+1) } else { symlink = filename filename = "" } print_prefix(level,substr($0,1,m-1) "" \ make_escape_sequences(symlink) "" \ make_escape_sequences(filename) ) } else # unrecognized ls line print_prefix(level,make_escape_sequences($0)) } function print_prefix(level,line) { if (save_mode) save_line[++lineno] = prefix(level) line else print prefix(level) line } function print_total(level,line) { if ((DIRECTORY in DIRECTORY_DIRECTORY_PROTECTION) && \ DIRECTORY_PROTECTION[DIRECTORY] !~ /r.x$/) return # hide information about protected directories print_prefix(level,line) }