# /u/sy/beebe/public_html/fonts/fonts-to-vendors.awk, Fri Jul 3 08:48:13 1998 # Edit by Nelson H. F. Beebe # ======================================================================== # Scan the *-fontnames.html files and create a set of HTML files that # contain sorted tables of fonts, relating font names to vendors. # # Separate files are used for each initial letter, both to reduce the # size of the HTML file to be loaded (>440KB otherwise, down to 1KB to # 80KB with this splitting), and to improve browser performance, which # proves to be very slow for large tables. # # Usage: # gawk -f fonts-to-vendors.awk *-fontnames.html # # The output files are named: # # fonts-to-vendors.html # fonts-to-vendors-0.html # ... # fonts-to-vendors-9.html # fonts-to-vendors-a.html # ... # fonts-to-vendors-z.html # # but entries for fontname initials with no fonts are suppressed. # # It is assumed that for every VENDOR-fontnames.html file, there is a # corresponding VENDOR.html file. The lists of fonts and vendors are # determined automatically, although there is special case code below in # process() to handle vendor names containing special characters or # multiple uppercase letters. # # [22-Aug-1998] -- Tighten code in add_vendor() that discards names # beginning with a period, so that lists of file # extensions are not interpreted as font names (e.g., # in bitstream-fonts-by-name.html). # # [05-Aug-1998] -- Fix error in reduction of hyphenated filenames that # reduced y-and-y to y, as well as the intended # adobe-type-on-call-4-1 to adobe. # # Modify add_vendor() to first check for a valid # initial, and only then, to add the font to the # index, eliminating an erroneous empty item for the # vendor directory resulting from the list of file # extensions in bitstream-fonts-by-name.html. # # Add diagnostic in process() if the reduction rules # that derive a vendor name from a file name produce # an empty string. # # [09-Jul-1998] -- Update to support font names beginning with digits. # Add support for listing only those vendors used in # each section. # # Add print_software(). # # Add master vendor index in top-level # font-names-by-vendor.html file. # # [08-Jul-1998] -- Update to handle font names with spaces. # # [03-Jul-1998] -- Original version. # ======================================================================== BEGIN { initialize() } /
  • / { process() } END { print_entries() print_directory() print_vendor_directory("") print_software() end_html() } # ======================================================================== function add_vendor(font,vendor, k,n,parts) { if ((font ~ "[ \t]*[.][a-z][a-z][a-z][^a-z]") || \ (font ~ "[ \t]*[.][a-z][a-z][a-z]$")) ; # ignore .afm, .pfb, ... list items in bitstream-fonts-by-name.html else if (index("0123456789abcdefghijklmnopqrstuvwxyz",tolower(substr(font,1,1))) == 0) print FILENAME ":" FNR ": bad initial: [" font "] for vendor [" vendor "]" >"/dev/stderr" else { if (font in Fontlist) { # Some vendors (e.g., URW) offer the same font in multiple # collections. We therefore only add the vendor if it is not # already in the list. split(Fontlist[font],parts,SUBSEP) n = split(parts[2],parts,", ") for (k = 1; k <= n; ++k) { if (parts[k] == vendor) return } Fontlist[font] = Fontlist[font] ", " vendor } else Fontlist[font] = font SUBSEP vendor } } function begin_html(title) { print_line("\n") print_line("") print_line("") print_line("\n") print_line("\n") print_line("") print_line(" ") print_line(" ") print_line(" " title) print_line(" ") print_line("") print_line(" ") print_line(" ") print_line("

    ") print_line(" " title) print_line("

    ") print_line("

    ") print_line(" Last update: " Current_Date_and_Time "") print_line("

    ") } function end_html() { print_line(" ") print_line("") close(Outfile) } function initialize() { "date" | getline Current_Date_and_Time close("date") if (USER == "") { USER = shell_command("whoami") if (USER == "") { if ("USER" in ENVIRON) UserName = ENVIRON["USER"] else if ("LOGNAME" in ENVIRON) UserName = ENVIRON["LOGNAME"] else UserName = "unknown" } else UserName = USER } else UserName = USER if (HOST == "") HostName = shell_command_with_default("hostname", \ ((("HOST" in ENVIRON) && (ENVIRON["HOST"] != "") ? ENVIRON["HOST"] : "unknown"))) else HostName = HOST # [10-Feb-1999] at the author's site, we prefer mail addresses to # be user@math.utah.edu, instead of # user@workstation.math.utah.edu, so that they are tied, not to a # specific machine, but rather, to a longer-lived domain of # machines. However, allow the invoker to override this with a # command-line option "-v MAILHOST = someotherhost". # # Sadly, there is no portable way to determine the mailhost name: # UNIX systems that run BIND (most probably do) should have an # entry in /etc/resolv.conf that is a domain name, but that might # not work as a mailhost name at a few sites. if (MAILHOST == "") MAILHOST = shell_command_with_default("awk '/^[ \t]*(domain|search)[ \t]/ { print $2 }' /etc/resolv.conf", \ HostName) } function less(a,b) { # We want the font lists to be ordered independent of lettercase: return (tolower(a) < tolower(b)) } function partition(array,left,right, i,j,swap,v) { i = left - 1 j = right v = array[right] for (;;) { while (less(array[++i],v)) ; while (less(v,array[--j])) { if (j == left) break } if (i >= j) break swap = array[i] array[i] = array[j] array[j] = swap } swap = array[i] array[i] = array[right] array[right] = swap return (i) } function print_directory( k,letter,vendor_count) { vendor_count = 0 for (file in Vendors_by_Filename) vendor_count++ Outfile = "fonts-to-vendors.html" begin_html("Font names by vendor") print_upindex() print_line("

    ") print_line(" Index of font names") print_line("

    ") print_line("

    ") print_line(" It is sometimes useful to map a font name to a font vendor.") print_line(" Here is a directory of such mappings for " Font_Count " fonts from " vendor_count " vendors.") print_line(" Select a section according to the first character of the font name,") print_line(" ignoring letter case:") print_line("

    ") print_line("

    ") for (k = 1; k <= 36; ++k) { letter = substr("0123456789abcdefghijklmnopqrstuvwxyz",k,1) if (letter in Initials_Used) print_line(" " \ letter "" ((k < 36) ? " |" : "")) } print_line("

    ") } function print_entries( last_initial,k,key,n,next_initial,parts,sorted_fontlist) { n = 0 for (key in Fontlist) sorted_fontlist[++n] = Fontlist[key] Font_Count = n # global variable for use in print_directory() quicksort(sorted_fontlist,1,n) last_initial = "" for (k = 1; k <= n; ++k) { next_initial = tolower(substr(sorted_fontlist[k],1,1)) if (last_initial != next_initial) { if (last_initial != "") { print_line(" ") print_uplink() end_html() } Initials_Used[next_initial] = 1 Outfile = "fonts-to-vendors-" next_initial ".html" begin_html("Fonts " toupper(next_initial) "... by vendor") print_uplink() print_vendor_directory(next_initial) print_line("

    ") print_line(" Fonts and their vendors") print_line("

    ") print_line(" ") } split(sorted_fontlist[k],parts,SUBSEP) print_line("\t") last_initial = next_initial } print_line("
    " ((last_initial != next_initial) ? \ ("" parts[1] "") : parts[1]) \ "" parts[2] "
    ") print_uplink() end_html() } function print_line(line) { print line > Outfile } function print_software() { print_line("

    ") print_line(" Indexing software") print_line("

    ") print_line("

    ") print_line(" The software used to prepare this font index is freely available:") print_line("

    ") print_line(" ") print_line("

    ") print_line(" The html-ncheck program used in the Makefile") print_line(" for HTML validation is available in the sp-x.y.z binary") print_line(" distributions at") print_line(" ") print_line(" http://www.math.utah.edu/pub/sgml/.") print_line(" ") print_line("

    ") print_line("

    ") print_line(" Preparation and validation of this index takes only about 45 sec") print_line(" wall clock time on the author's Sun UltraSPARC 170 workstation.") print_line(" An automated nightly make run ensures that it is kept up-to-date") print_line(" if other files in this directory are updated.") print_line("

    ") } function print_upindex() { print_line("

    ") print_line(" ") print_line(" Up to notes on fonts") print_line(" ") print_line("

    ") } function print_uplink() { print_line("

    ") print_line(" ") print_line(" Up to index of font names by vendor") print_line(" ") print_line("

    ") } function print_vendor_directory(this_initial, file,filenames_by_vendor, \ k,key,n,parts,sorted_vendors,vendors, \ vendors_used_in_this_section) { # First build vendors_used_in_this_section[], indexed by vendor if (this_initial == "") # fast case { # this_initial is an empty string, so all vendors are wanted. # We can compute that list faster than by scanning the entire # Fontlist and splitting it into vendor names. for (file in Vendors_by_Filename) vendors_used_in_this_section[Vendors_by_Filename[file]] = 1 } else # slow case { # this_initial is a single character, so we have to examine # the entire Fontlist, split out the vendors used in this # section, and add them to vendors_used_in_this_section[] for (key in Fontlist) { if (tolower(substr(Fontlist[key],1,1)) == this_initial) { split(Fontlist[key],parts,SUBSEP) n = split(parts[2],vendors,", ") for (k = 1; k <= n; ++k) vendors_used_in_this_section[vendors[k]] = 1 } } } # Next build sorted_vendors[], an (still unsorted) list of # vendors indexed by an integer 1, 2, ..., n n = 0 for (file in Vendors_by_Filename) { if (Vendors_by_Filename[file] in vendors_used_in_this_section) { sorted_vendors[++n] = Vendors_by_Filename[file] filenames_by_vendor[Vendors_by_Filename[file]] = file } } quicksort(sorted_vendors,1,n) print_line("

    ") print_line(" Directory of cited vendors") print_line("

    ") print_line(" ") } function process( font,n,vendor,vendor_filename) { # The code here assumes that fontname entries are # prettyprinted like this: #
  • # ACaslon-AltBold #
  • getline if ($0 ~ "<[A-Z]") # ignore tables-of-contents entries return # and anything else beginning with a tag font = $0 # should be $1, but URW font names on the Web have embedded spaces gsub(/^ +/,"",font) # trim leading and gsub(/ +$/,"",font) # trailing space gsub(/ +/," ",font) # remap runs of spaces to single spaces # gsub(/ +/,"-",font) # remap runs of spaces to hyphens gsub(/Ä/,"Ae",font) # some URW fonts have umlauts, but gsub(/Ö/,"Oe",font) # other vendors map these to digraphs gsub(/Ü/,"Ue",font) # so we do as well. This also repairs gsub(/ä/,"ae",font) # a sorting problem that otherwise gsub(/ö/,"oe",font) # occurs because of the SGML "&name;" gsub(/ü/,"ue",font) # entities. vendor = FILENAME sub("[.]html$","",vendor) sub("-fontnames$","",vendor) sub("adobe-.*$","adobe",vendor) # reduce adobe-type-on-call-4-1 to adobe sub("mathematica-.*","mathematica",vendor) vendor_filename = vendor ".html" # Handle some special cases sub("bluesky","Blue Sky Research",vendor) sub("itf","ITF",vendor) sub("urw","URW",vendor) sub("tigers-type-specialists","Tiger's Type Specialists",vendor) sub("y-and-y","Y\\&Y",vendor) if (length(vendor) > 0) { vendor = toupper(substr(vendor,1,1)) substr(vendor,2) Vendors_by_Filename[vendor_filename] = vendor add_vendor(font,vendor) } else print FILENAME ":" FNR ": filename reduction produced empty vendor name" >"/dev/stderr" } function quicksort(array,left,right, i) { # The code in partition() and quicksort() is a direct translation # of the simple quicksort algorithm given in Robert Sedgewick's # ``Algorithms in C'', 3rd edition, Addison-Wesley, 1998, # pp. 305--307. We need an O(N lg N) algorithm here instead of a # simpler O(N^2) algorithm because the font list has thousands of # entries. There are many things that one can do to tweak # quicksort() to make its worst-case behavior of O(N^2) unlikely, # and to improve its performance on small sequences by switching # to other sorting algorithms. However, we do not attempt any of # those refinements here. # # The user-defined less(a,b) function conceals the details of how # array items are compared. if (right <= left) return i = partition(array,left,right) quicksort(array, left, i - 1) quicksort(array, i + 1, right) } function shell_command(command, s) { # Run a shell command and return its first output line, after # closing the file. command | getline s close(command) return (s) } function shell_command_with_default(command,default, s) { # Run a shell command and return its first output line, or if that # is empy, return the default argument, after closing the file. s = shell_command(command) return ((s == "") ? default : s) }