# /u/sy/beebe/tex/bib/ref2bib.awk, Tue Feb 20 07:46:24 1996 # Edit by Nelson H. F. Beebe # ======================================================================== # Convert a UNIX bib/refer file to BibTeX. # # I developed this program for processing the USENIX bib/refer # bibliography from ftp://ftp.uu.net/library/bibliography, after # finding that r2b (in perl) failed with syntax errors, and # refer-to-bibtex.el went into an infinite loop. Significant parts # were adapted from oclc-to-bibtex.awk and melvyl-to-bibtex.awk. # # Usage: # nawk -f ref2bib.awk outfile # # @InProceedings{} entries are split into separate @InProceedings{} # and @Proceedings entries, and the latter may consequently be # duplicated in the output. This is trivially handled by application # of "bibsort -u". # # From # # man 5 addbib | grep '^ *%' # # we have the following bib/refer key descriptions: # # %A Author's name # %B Book containing article referenced # %C City (place of publication) # %D Date of publication # %E Editor of book containing article referenced # %F Footnote number or label (supplied by refer) # %G Government order number # %H Header commentary, printed before reference # %I Issuer (publisher) # %J Journal containing article # %K Keywords to use in locating reference # %L Label field used by -k option of refer # %M Bell Labs Memorandum (undefined) # %N Number within volume # %O Other commentary, printed at end of reference # %P Page number(s) # %Q Corporate or Foreign Author (unreversed) # %R Report, paper, or thesis (unpublished) # %S Series title # %T Title of article or book # %V Volume number # %X Abstract - used by roffbib, not by refer # %Y,Z Ignored by refer # # [27-Dec-1999] -- change pageswhole to bookpages # [29-Jul-1999] -- Add pattern for %G. # [20-Nov-1998] -- Add key descriptions in comment header. # Add support for %L, %R, %S, %U (private), %Y, and %Z. # Extend handling in convert_R() to handle two thesis # types in addition to reports. # Convert global variables to initial-caps naming # convention. # Modify fix_dashes() to require blanks around existing # en-dashes before converting them to em-dashes. # # [17-Sep-1997] -- Fix small bug in strip_outer_nonletters() (missing # caret in regexp). # # [01-Jun-1996] -- Original version. # ======================================================================== BEGIN { initialize() } /^%A/ { convert_A(); next } /^%B/ { convert_B(); next } /^%C/ { convert_C(); next } /^%D/ { convert_D(); next } /^%E/ { convert_E(); next } /^%G/ { convert_G(); next } /^%I/ { convert_I(); next } /^%J/ { convert_J(); next } # special addition for OCR input /^%K/ { convert_K(); next } /^%L/ { convert_L(); next } /^%M/ { convert_M(); next } # special addition for OCR input /^%N/ { convert_N(); next } /^%O/ { convert_O(); next } /^%P/ { convert_P(); next } /^%R/ { convert_R(); next } /^%S/ { convert_S(); next } /^%T/ { convert_T(); next } /^%U/ { convert_U(); next } # URL: private extension /^%V/ { convert_V(); next } /^%W/ { convert_W(); next } /^%X/ { convert_X(); next } /^%Y/ { convert_Y(); next } /^%Z/ { convert_Z(); next } /^%/ { warning("Unrecognized line: [" $0 "]"); next } /^[ \t_\-]*$/ { print_entry(); next } # Additional patterns to handle the recent USENIX publications noted # in http://www.usenix.org/cgi-bin/sortbib.pl?-sD which has longer # key:value pairs, and apparently no continutation lines, but # otherwise, works much like bib/refer does. /^ *<\/?[A-Z][A-Z0-9]*>/ { next } # ignore HTML tags /^ *Author:/ { remap(); convert_A(); next } /^ *Date:/ { remap(); convert_D(); next } /^ *Institution:/ { remap(); convert_W(); next } /^ *Location:/ { remap(); convert_C(); next } /^ *Num in Vol:/ { remap(); convert_N(); next } /^ *Other:/ { remap(); convert_O(); next } /^ *Pages:/ { remap(); convert_P(); next } /^ *Proceedings:/ { remap(); convert_J(); next } /^ *Publisher:/ { remap(); convert_I(); next } /^ *Title:/ { remap(); convert_T(); next } /^ *Unknown:/ { remap(); convert_X(); next } /^ *Volume:/ { remap(); convert_V(); next } # Anything else is a continuation of the previous line { append($0); next} END { print_entry() } function append(s) { if (Last_Key in Entry) join(Last_Key," ",s) else set("unknown",s) } function author_editor() { if ("author" in Entry) return (Entry["author"]) else if ("editor" in Entry) return (Entry["editor"]) else return ("Anonymous") } function capitalize(s, k,n,parts) { if (is_corporate_name(s)) return (s) # leave corporate names alone if (s ~ /[A-Z][A-Z]/) # Reduce HEARN, A. C. to Hearn, A. C. { # but leave McMahon, L. E. unchanged n = split(s,parts," ") for (k = 1; k <= n; ++k) s = (k == 1) ? \ capitalize_word(parts[k]) : \ s " " capitalize_word(parts[k]) } return (s) } function capitalize_word(s, k) { for (k = 1; k <= length(s); ++k) { if (isupper(substr(s,k,1)) && (k > 1) && isletter(substr(s,k-1,1))) s = substr(s,1,k-1) tolower(substr(s,k,1)) substr(s,k+1) } return (s) } function citation_tag( author,abbrev,k,n,parts,t,tag) { author = author_editor() if (substr(author,1,1) == "{") { # have corporate author split(author,parts," ") # "{SPSS, Inc."} t = substr(parts[1],2) # "SPSS," gsub(/,/,"",t) # reduce to "SPSS" } else t = capitalize(author) split(t,parts," and ") t = parts[1] # need only first author name if (index(t,",") == 0) { # looks like John Wesley Harding n = split(t,parts," ") tag = parts[n] } else { split(t,parts,",") # careful: "De La Beaujardiere, Jean-M" has spaces in name tag = parts[1] } gsub(/[^-A-Za-z'\055]/,"",tag) # strip non-(letters,apostrophe) tag = tag ":" Entry["year"] ":" # We do a second strip here, because some entries for the year # can have spaces and other words in them, and because we do # not want apostrophes in the tag (bibxxx utilities complain). gsub(/[^-A-Za-z0-9:\055]/,"",tag) # strip non-(letters,digits,colon)) n = split(Entry["title"],parts," ") abbrev = "" for (k = 1; (k <= n) && (length(abbrev) < 3); ++k) { gsub(/[^A-Za-z0-9'\055]/,"",parts[k])# strip non-(letters,digits,apostrophe) if (isletter(substr(parts[k],1,1))) { parts[k] = tolower(parts[k]) if (!(parts[k] in Ignore)) abbrev = abbrev toupper(substr(parts[k],1,1)) } } return (tag abbrev) } function clean_entry() { # Now that the entry has been completely collected, tidy it up for # subsequent printing. if (!("year" in Entry)) set("year","19xx") if (("annote" in Entry) && !match(Entry["annote"],/[.!?']$/)) Entry["annote"] = Entry["annote"] "." if (("note" in Entry) && !match(Entry["note"],/[.!?']$/)) Entry["note"] = Entry["note"] "." fix_page_range() if (entry_name() == "Article") { if (!("CODEN" in Entry)) set("OPTCODEN","????") if (!("ISSN" in Entry)) set("OPTISSN","????") delete Entry["address"] delete Entry["publisher"] } else if ((entry_name() == "Proceedings") || (entry_name() == "Book")) { if (!("address" in Entry)) set("address","????") if (!("publisher" in Entry)) set("publisher","????") if (!("pages" in Entry)) set("pages","????") if (!("ISBN" in Entry)) set("OPTISBN","????") if (!("LCCN" in Entry)) set("OPTLCCN","????") } if (entry_name() == "Proceedings") { if ((!("editor" in Entry)) && (!("key" in Entry))) Entry["key"] = "????" } if ("annote" in Entry) Entry["annote"] = fix_dashes(Entry["annote"]) if ("booktitle" in Entry) Entry["booktitle"] = fix_dashes(Entry["booktitle"]) if ("note" in Entry) Entry["note"] = fix_dashes(Entry["note"]) if ("title" in Entry) Entry["title"] = fix_dashes(Entry["title"]) } function clear_array(array, key) { for (key in array) delete array[key] } function convert_A() { join("author"," and ", fix_author(value())) } function convert_B() { set("booktitle",value()) } function convert_C() { set("address",value()) } function convert_D( k,n,s,words) { s = value() gsub(/ -+ /,"--",s) # remove blanks around day ranges if (match(s,/^[A-Z][a-z]+ [0-9][0-9]?--[A-Z][a-z]+ [0-9][0-9]?, [12][0-9][0-9][0-9]$/)) { # looks like "September 30--October 3, 1991" split(s,words,",") set("month",trim(words[1])) set("year",trim(words[2])) } else { n = split(s,words," ") for (k = 1; k <= n; ++k) { words[k] = trim(words[k]) gsub(/[, ]+$/,"",words[k]) # trim trailing punctuation words[k] = trim(words[k]) if (match(words[k],/^[A-Z][a-z]+[.]?$/)) set("month",words[k]) else if (match(words[k],/^[A-Z][a-z]+[.]?\/[A-Z][a-z]+[.]?$/)) set("month",words[k]) else if (match(words[k],/^[A-Z][a-z]+[.]?-+[A-Z][a-z]+[.]?$/)) { gsub(/-+/,"/",words[k]) # change range to slash (conversion set("month",words[k]) # handled later in print_month) } else if (match(words[k],/^[0-9]+-+[0-9]+$/)) { gsub(/-+/,"--",words[k]) # force en-dash for ranges set("day",words[k]) } else if (match(words[k],/^[0-9][0-9]?$/)) set("day",words[k]) else if (match(words[k],/^[12][0-9][0-9][0-9]$/)) set("year",words[k]) else { warning("Unrecognized date field [" words[k] "]") set("date",words[k]) } } } } function convert_E() { join("editor"," and ", fix_author(value())) } function convert_G() { set("govtordernumber",value()) } function convert_I() { set("publisher",value()) } function convert_J() { set("journal",value()) } function convert_K() { set("keywords",value()) } function convert_L() { set("label",value()) } function convert_M() { set("memolabel",value()) } function convert_N() { set("number",value()) } function convert_O() { if (match($0,/FTP *- */)) # special support for USENIX bibliography { sub(/:\//,"/") # change host:/path to host/path sub(/FTP *- */, "ftp://") # prefix ftp:// to host/path set("URL",value()) } else set("note",value()) } function convert_P( s) { s = value() gsub(/ *-+ */,"--",s) # force en-dash for page ranges set("pages",s) } function convert_R() { if (match(value(),"[Pp][Hh][.] *[Dd].*[Tt][Hh][Ee][Ss][Ii][Ss]")) set("entrytype","PhdThesis") else if (match(value(),"[Mm][Aa][Ss][Tt][Ee][Rr].*[Tt][Hh][Ee][Ss][Ii][Ss]")) set("entrytype","MastersThesis") else set("entrytype","TechReport") set("type",value()) } function convert_S() { if (!("entrytype" in Entry)) set("entrytype","InProceedings") set("series",value()) } function convert_T() { set("title",embrace(value())) } function convert_U() { set("URL",value()) } function convert_V() { set("volume",value()) } function convert_W() { join("affiliation","; ",value()) } function convert_X() { set("annote",value()) } function convert_Y() { set("ignore-y",value()) } function convert_Z() { set("ignore-z",value()) } function dot_initials(s, k,n,parts,t) { if (is_corporate_name(s)) return (s) n = split(s,parts," ") for (k = 1; k <= n; ++k) # Expand A C Hearn to A. C. Hearn { t = (k == 1) ? parts[k] : t " " parts[k] if (length(parts[k]) == 1) t = t "." } n = split(t,parts,".") for (k = 1; k <= n; ++k) # expand A.C. Hearn to A. C. Hearn { parts[k] = trim(parts[k]) t = ((k == 1) ? parts[k] : t " " parts[k]) ((k < n) ? "." : "") } gsub(/[.] -/,".-",t) # reduce Chang, C. -C. to Chang, C.-C. gsub(/[.] +}/,".}",t) # {... Jr. } to {... Jr.} return (t) } function embrace(s, k,n,t,words) { n = split(s,words) t = "" for (k = 1; k <= n; ++k) { if ( (words[k] ~ /^[A-Z][A-Z0-9]+$/) || \ (words[k] ~ /^[B-Z]$/) || \ (words[k] ~ /[A-Z][A-Z]/) || \ (words[k] ~ /[A-Z]\/[A-Z]/) || \ (words[k] ~ /[a-z0-9][A-Z]/) || \ (words[k] ~ /[0-9]-[A-Z]/) || \ (strip_outer_nonletters(words[k]) in Braceable_Words) ) t = t "{" words[k] "}" else t = t words[k] if (k < n) t = t " " } gsub(/} {/," ",t) # join adjacent braced words gsub(/:}/,"}:",t) # move colon, gsub(/,}/,"},",t) # comma, and gsub(/'s}/,"}'s",t) # possessives outside gsub(/'es}/,"}'es",t) # of the braces return (t) } function enquote(v) { if (substr(v,1,1) == "\"") # enquote leading quote v = "{\377}" substr(v,2) while (match(v,/[^\\]\"/)) { # brace unbackslashed quotes v = substr(v,1,RSTART) "{\377}" substr(v,RSTART+2) } gsub(/\377/,"\"",v) gsub(/&/,"\\\\&",v) # protect ampersands gsub(/[ \t][ \t]+/," ",v) # eliminate redundant spaces gsub(/[%]/,"\\%",v) # protect quotes gsub(/[#]/,"\\#",v) # protect sharps gsub(/[_]/,"\\_",v) # protect underscores gsub(/[\^]/,"\\^",v) # protect carets if (v ~ /[0-9][$][0-9]/) gsub(/[$]/,"--",v) # June 7$10 -> June 7--10 else if (v ~ /[$][0-9]+/) gsub(/[$]/,"US\\$",v) # $27 -> US\$27 and $27.95 -> US\$27.95 else gsub(/[$]/,"\\$",v) # protect dollar signs return (v) } function entry_name( ) { return (("entrytype" in Entry) ? \ Entry["entrytype"] : "????UNKNOWN-TYPE????") } function fix_author(s) { gsub(/[,]? et al[.]?/," and others",s) gsub(/[.] */,". ",s) # ensure space after initials gsub(/[ ,]+$/,"",s) # remove any erroneous trailing comma(s) # Supply protecting braces around "Lastname, Jr" and "Lastname, III" if (match(s,/ [A-Z][\']?[a-z]+[,]? [JS]r[.]?$/)) s = substr(s,1,RSTART) "{" substr(s,RSTART+1) "}" else if (match(s,/ [A-Z][\']?[a-z]+[,]? [IVX]+$/)) s = substr(s,1,RSTART) "{" substr(s,RSTART+1) "}" return (dot_initials(s)) } function fix_dashes(s) { gsub(" - "," --- ",s) gsub(" -- "," --- ",s) while (match(s,"[a-zA-Z]--[^?0-9-]")) s = substr(s,1,RSTART) " --- " substr(s,RSTART+RLENGTH-1) return (s) } function fix_page_range( e) { # Check for missing page range data: OCLC databases usually have # only the initial page number, sigh... e = entry_name() if ( (e == "Article") || (e == "InBook") || (e == "InCollection") || \ (e == "InProceedings") ) { if ("pages" in Entry) { # replace single number by uncertain range if (Entry["pages"] ~ /^[0-9]+$/) Entry["pages"] = Entry["pages"] "--??" } else Entry["pages"] = "??--??" } } function fix_quotes(s, count,k) { count = 0 k = index(s, "\"") while (k > 0) { count++ if ((count % 2) == 1) # open quote s = substr(s,1,k-1) "``" substr(s,k+1) else # close quote s = substr(s,1,k-1) "''" substr(s,k+1) k = index(s, "\"") } if ((count % 2) == 1) warning("Unbalanced double quotes in " s) return (s) } function guess_type( type,year) { if (!("entrytype" in Entry)) { year = ("year" in Entry) ? Entry["year"] : "19xx" if (match_booktitle("proceedings") || \ match_booktitle("conference") || \ match_booktitle("symposium") || \ match_booktitle("workshop")) type = "InProceedings" else if ((!("journal" in Entry)) && \ (match_booktitle("newsletter") || \ match_booktitle("computing systems") || \ match_booktitle(";login"))) { # [last two are special cases for USENIX bibliography] type = "Article" Entry["journal"] = Entry["booktitle"] delete Entry["booktitle"] } else if ("journal" in Entry) type = "Article" else if ("ISSN" in Entry) type = "Periodical" else if ("ISBN" in Entry) type = "Book" else if (year >= "1972") type = "Book" else type = "Article" set("entrytype",type) } } function initialize() { initialize_Braceable_Words_table() initialize_ignore_list() initialize_Month_Abbreviations() "date" | getline Current_Date_and_Time close("date") new_entry() } function initialize_Braceable_Words_table() { # These words commonly occur in conference titles Braceable_Words["January"] = 1 Braceable_Words["February"] = 1 Braceable_Words["March"] = 1 Braceable_Words["April"] = 1 Braceable_Words["May"] = 1 Braceable_Words["June"] = 1 Braceable_Words["July"] = 1 Braceable_Words["August"] = 1 Braceable_Words["September"] = 1 Braceable_Words["October"] = 1 Braceable_Words["November"] = 1 Braceable_Words["December"] = 1 # These proper nouns and adjectives commonly occur in # computer-related bibliographies Braceable_Words["Ada"] = 1 Braceable_Words["Ada"] = 1 Braceable_Words["American"] = 1 Braceable_Words["Athena"] = 1 Braceable_Words["Australian"] = 1 Braceable_Words["Berkeley"] = 1 Braceable_Words["Bourne"] = 1 Braceable_Words["British"] = 1 Braceable_Words["Canadian"] = 1 Braceable_Words["C++"] = 1 Braceable_Words["Chorus"] = 1 Braceable_Words["Cray"] = 1 Braceable_Words["Emacs"] = 1 Braceable_Words["English"] = 1 Braceable_Words["Ethernet"] = 1 Braceable_Words["European"] = 1 Braceable_Words["Fortran"] = 1 Braceable_Words["French"] = 1 Braceable_Words["German"] = 1 Braceable_Words["Intel"] = 1 Braceable_Words["Internet"] = 1 Braceable_Words["Japanese"] = 1 Braceable_Words["Korn"] = 1 Braceable_Words["Kerberos"] = 1 Braceable_Words["Lisp"] = 1 Braceable_Words["Mach"] = 1 Braceable_Words["Macintosh"] = 1 Braceable_Words["Microsoft"] = 1 Braceable_Words["Mosaic"] = 1 Braceable_Words["Motif"] = 1 Braceable_Words["O'Reilly"] = 1 Braceable_Words["Perl"] = 1 Braceable_Words["Prolog"] = 1 Braceable_Words["Smalltalk"] = 1 Braceable_Words["Sun"] = 1 Braceable_Words["Tcl"] = 1 Braceable_Words["Tk"] = 1 Braceable_Words["Transputer"] = 1 Braceable_Words["Ultrix"] = 1 Braceable_Words["Unix"] = 1 Braceable_Words["Unix-based"] = 1 Braceable_Words["Usenet"] = 1 Braceable_Words["Usenix"] = 1 Braceable_Words["Vax"] = 1 Braceable_Words["Windows"] = 1 Braceable_Words["Xlib"] = 1 Braceable_Words["Xmt"] = 1 } function initialize_ignore_list() { # List of words to ignore in forming citation tags. The initial # list was extracted from the bibindex badwords list, and covers # a few European languages as well as English. Ignore["a"] = 1 Ignore["ab"] = 1 Ignore["aber"] = 1 Ignore["als"] = 1 Ignore["an"] = 1 Ignore["and"] = 1 Ignore["are"] = 1 Ignore["as"] = 1 Ignore["auf"] = 1 Ignore["aus"] = 1 Ignore["az"] = 1 Ignore["bei"] = 1 Ignore["bir"] = 1 Ignore["but"] = 1 Ignore["da"] = 1 Ignore["das"] = 1 Ignore["dat"] = 1 Ignore["de"] = 1 Ignore["dei"] = 1 Ignore["dem"] = 1 Ignore["den"] = 1 Ignore["der"] = 1 Ignore["des"] = 1 Ignore["det"] = 1 Ignore["di"] = 1 Ignore["die"] = 1 Ignore["dos"] = 1 Ignore["e"] = 1 Ignore["een"] = 1 Ignore["eene"] = 1 Ignore["egy"] = 1 Ignore["ei"] = 1 Ignore["ein"] = 1 Ignore["eine"] = 1 Ignore["einen"] = 1 Ignore["einer"] = 1 Ignore["eines"] = 1 Ignore["eit"] = 1 Ignore["el"] = 1 Ignore["en"] = 1 Ignore["er"] = 1 Ignore["es"] = 1 Ignore["et"] = 1 Ignore["ett"] = 1 Ignore["eyn"] = 1 Ignore["eyne"] = 1 Ignore["for"] = 1 Ignore["from"] = 1 Ignore["fuer"] = 1 Ignore["fur"] = 1 Ignore["gl"] = 1 Ignore["gli"] = 1 Ignore["ha"] = 1 Ignore["haben"] = 1 Ignore["had"] = 1 Ignore["hai"] = 1 Ignore["has"] = 1 Ignore["hat"] = 1 Ignore["have"] = 1 Ignore["he"] = 1 Ignore["heis"] = 1 Ignore["hen"] = 1 Ignore["hena"] = 1 Ignore["henas"] = 1 Ignore["het"] = 1 Ignore["hin"] = 1 Ignore["hinar"] = 1 Ignore["hinir"] = 1 Ignore["hinn"] = 1 Ignore["hith"] = 1 Ignore["ho"] = 1 Ignore["hoi"] = 1 Ignore["i"] = 1 Ignore["il"] = 1 Ignore["in"] = 1 Ignore["is"] = 1 Ignore["ist"] = 1 Ignore["ka"] = 1 Ignore["ke"] = 1 Ignore["l"] = 1 Ignore["la"] = 1 Ignore["las"] = 1 Ignore["le"] = 1 Ignore["les"] = 1 Ignore["lo"] = 1 Ignore["los"] = 1 Ignore["mia"] = 1 Ignore["mit"] = 1 Ignore["n"] = 1 Ignore["na"] = 1 Ignore["nji"] = 1 Ignore["not"] = 1 Ignore["o"] = 1 Ignore["oder"] = 1 Ignore["of"] = 1 Ignore["on"] = 1 Ignore["or"] = 1 Ignore["os"] = 1 Ignore["others"] = 1 Ignore["s"] = 1 Ignore["sie"] = 1 Ignore["sind"] = 1 Ignore["so"] = 1 Ignore["t"] = 1 Ignore["ta"] = 1 Ignore["the"] = 1 Ignore["to"] = 1 Ignore["um"] = 1 Ignore["uma"] = 1 Ignore["un"] = 1 Ignore["una"] = 1 Ignore["und"] = 1 Ignore["une"] = 1 Ignore["uno"] = 1 Ignore["unter"] = 1 Ignore["von"] = 1 Ignore["with"] = 1 Ignore["y"] = 1 Ignore["yr"] = 1 # Additional words added later Ignore["also"] = 1 Ignore["any"] = 1 Ignore["away"] = 1 Ignore["by"] = 1 Ignore["cum"] = 1 Ignore["dans"] = 1 Ignore["down"] = 1 Ignore["into"] = 1 Ignore["its"] = 1 Ignore["off"] = 1 Ignore["onto"] = 1 Ignore["out"] = 1 Ignore["over"] = 1 Ignore["sur"] = 1 Ignore["that"] = 1 Ignore["these"] = 1 Ignore["this"] = 1 Ignore["those"] = 1 Ignore["unto"] = 1 Ignore["up"] = 1 Ignore["via"] = 1 Ignore["without"] = 1 Ignore["zu"] = 1 Ignore["zum"] = 1 Ignore["zur"] = 1 } function initialize_Month_Abbreviations() { Month_Abbrev["january"] = "jan" Month_Abbrev["february"] = "feb" Month_Abbrev["march"] = "mar" Month_Abbrev["april"] = "apr" Month_Abbrev["may"] = "may" Month_Abbrev["june"] = "jun" Month_Abbrev["july"] = "jul" Month_Abbrev["august"] = "aug" Month_Abbrev["september"] = "sep" Month_Abbrev["october"] = "oct" Month_Abbrev["november"] = "nov" Month_Abbrev["december"] = "dec" Month_Abbrev["jan"] = "jan" Month_Abbrev["feb"] = "feb" Month_Abbrev["mar"] = "mar" Month_Abbrev["apr"] = "apr" Month_Abbrev["may"] = "may" Month_Abbrev["jun"] = "jun" Month_Abbrev["jul"] = "jul" Month_Abbrev["aug"] = "aug" Month_Abbrev["sep"] = "sep" Month_Abbrev["oct"] = "oct" Month_Abbrev["nov"] = "nov" Month_Abbrev["dec"] = "dec" Month_Abbrev["sept"] = "sep" } function is_corporate_name(s) { return (match(s,/Ltd/) || match(s,/Co[.]/) || match(s,/Company/) || \ match(s,/Corp/) || match(s,/Inc[.]?$/) || match(s,/Pty[.]?/) || \ match(s,/Cie[.]/) || match(s,/AG/)) } function isletter(c) { return (index("ABCDEFGHIJKLMNOPQRSTUVWXYZ",toupper(c)) > 0) } function isupper(c) { return (index("ABCDEFGHIJKLMNOPQRSTUVWXYZ",c) > 0) } function join(key,separator,s) { if (key in Entry) Entry[key] = Entry[key] separator s else Entry[key] = s } function match_booktitle(s) { return ("booktitle" in Entry) && match(tolower(Entry["booktitle"]),s) } function new_entry() { clear_array(Entry) Last_Key = "" } function print_entry( crossref,key,k) { k = 0 for (key in Entry) # count the number of saved key/value pairs k++ guess_type() if (("entrytype" in Entry) && (k > 0)) { clean_entry() if (entry_name() == "InProceedings") { # Output a Proceedings entry for each InProceedings. Any # duplicates that are generated can later be flushed by # "bibsort -u". save_entry() delete Entry["affiliation"] delete Entry["annote"] delete Entry["author"] delete Entry["note"] delete Entry["label"] delete Entry["pages"] delete Entry["URL"] delete Entry["crossref"] delete Entry["keywords"] delete Entry["unknown"] Entry["title"] = Entry["booktitle"] set("pages","????") Entry["entrytype"] = "Proceedings" crossref = citation_tag() print_entry() # print @Proceedings{...} recursively restore_entry() set("crossref",crossref) delete Entry["address"] delete Entry["booktitle"] delete Entry["day"] delete Entry["editor"] delete Entry["month"] delete Entry["publisher"] delete Entry["CODEN"] delete Entry["ISBN"] delete Entry["ISSN"] delete Entry["series"] delete Entry["memolabel"] delete Entry["type"] } print("@" entry_name() "{" citation_tag() ",") # temporary alteration for USENIX bibliography # Entry["acknowledgement"] = "ack-nhfb" Entry["bibsource"] = "ftp://ftp.uu.net/library/bibliography" Entry["bibdate"] = Current_Date_and_Time # After trying strict alphabetic order for the keywords, I # decided that it is more useful to have most of them in a # specific order, that corresponding to the usual order in an # article citation. print_pair() will delete each entry that # it prints, and ignore any that don't exist, so that key/value # pairs are not duplicated in the subsequent output after # sort_keys(). The key order here is chosen to match biborder(1). print_pair("author") print_pair("editor") print_pair("key") print_pair("booktitle") print_pair("title") print_pair("crossref") print_pair("chapter") print_pair("journal") print_pair("volume") print_pair("type") print_pair("number") print_pair("howpublished") print_pair("institution") print_pair("organization") print_pair("publisher") print_pair("school") print_pair("address") print_pair("edition") print_pair("pages_whole") # NB: Must precede "pages" because it print_pair("pages") # checks for the existence of "pages" print_pair("day") print_pair("month") print_pair("year") print_pair("ISBN") print_pair("ISSN") print_pair("LCCN") print_pair("note") print_pair("price") print_pair("series") # Output any remaining key/value pairs sorted by key name sort_keys() for (k = 1; Sorted_Keys[k]; ++k) print_key_value(Sorted_Keys[k],Entry[Sorted_Keys[k]]) print "}\n" } new_entry() } function print_key_abbrev(key,abbrev) { printf(" %s =%s %s,\n", key, \ substr(" ",1,12-length(key)), abbrev) } function print_key_string(key,string) { # TeXify 8-bit accented characters gsub(/\252/,"\\?",string) # what is this character?? Perhaps trademark gsub(/\260/,"\\?",string) # what is this character?? Displays as Angstrom accent in willow, but as separate character gsub(/\341/,"\\`",string) gsub(/\342/,"\\'",string) gsub(/\343/,"\\^",string) gsub(/\346/,"\\?",string) # what is this character?? gsub(/\351/,"\\v ",string) gsub(/\365x/,"${}^x$",string) gsub(/\365X/,"${}^X$",string) gsub(/\350a/,"{\\\"a}",string) gsub(/\350e/,"{\\\"e}",string) gsub(/\350o/,"{\\\"o}",string) gsub(/\350u/,"{\\\"u}",string) gsub(/[.][.][.]/,"\\ldots{}",string) # convert ellipses ... to \ldots{} gsub(/[\;:, ]+$/,"",string) # remove selected trailing punctuation printf(" %s = %s\"%s\",\n", \ key, \ substr(" ",1,12-length(key)), \ trim(enquote(fix_quotes(string)))) } function print_key_value(key,value) { if (key == "entrytype") # already output as @type{tag, # } return else if (key == "acknowledgement") print_key_abbrev(key,value) else if (key == "month") print_month(value) else if (key == "pages_whole") print_key_string(("pages" in Entry) ? "bookpages" : "pages", value) else print_key_string(key,value) } function print_month(month, k,mon,n,s,words) { n = split(month,words,"/") s = "" for (k = 1; k <= n; ++k) { mon = tolower(words[k]) gsub(/[^A-Za-z\055]/,"",mon) # strip punctuation if (mon in Month_Abbrev) s = (s == "") ? Month_Abbrev[mon] : \ (s " # \"\\slash \" # " Month_Abbrev[mon]) else s = (s == "") ? ("\"" words[k] "\"") : \ (s " # \"\\slash \" # " ("\"" words[k] "\"")) } print_key_abbrev("month", s) } function print_pair(key) { if (key in Entry) { print_key_value(key,Entry[key]) delete Entry[key] } } function remap( n) { n = index($0,":") if (n > 0) $0 = "%%" substr($0,n+1) } function restore_entry( key) { for (key in Entry) # delete old entry delete Entry[key] for (key in Saved_Entry) # copy Saved_Entry to entry Entry[key] = Saved_Entry[key] } function save_entry( key) { for (key in Saved_Entry) # delete old Saved_Entry delete Saved_Entry[key] for (key in Entry) # copy entry to Saved_Entry Saved_Entry[key] = Entry[key] } function set(key,s, old_Last_Key) { if (key in Entry) { warning("Duplicate " key ": old text [" Entry[key] "]") old_Last_Key = Last_Key Last_Key = "old" key # Generates infinite loop: # append(value()) Entry[key] = Entry[key] " " s Last_Key = old_Last_Key } Entry[key] = s Last_Key = key } function sort_keys( k,key,m,n) { clear_array(Sorted_Keys) n = 0 for (key in Entry) { n++ Sorted_Keys[n] = key } for (k = 1; k < n; ++k) { for (m = k + 1; m <= n; ++m) { if (tolower(Sorted_Keys[k]) > tolower(Sorted_Keys[m])) { key = Sorted_Keys[m] Sorted_Keys[m] = Sorted_Keys[k] Sorted_Keys[k] = key } } } } function strip_outer_nonletters(s) { sub(/^[^A-Za-z]+/,"",s) sub(/[^A-Za-z]+$/,"",s) return (s) } function trim(s) { gsub(/^[ \t]+/,"",s) gsub(/[ \t]+$/,"",s) return (s) } function value( s) { s = substr($0,3) gsub(/[\t]/," ",s) # reduce tabs to spaces sub(/^ +/,"",s) # trim leading space sub(/ +$/,"",s) # trim trailing space gsub(/ +/," ",s) # reduce multiple spaces to single ones return (s) } function warning(message) { print FILENAME ":" FNR ":%%" message >"/dev/stderr" }