%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%% author = "Nelson H. F. Beebe",
%%% version = "1.13",
%%% date = "16 June 2008",
%%% time = "17:20:34 MDT",
%%% filename = "talip.bib",
%%% address = "University of Utah
%%% Department of Mathematics, 110 LCB
%%% 155 S 1400 E RM 233
%%% Salt Lake City, UT 84112-0090
%%% USA",
%%% telephone = "+1 801 581 5254",
%%% FAX = "+1 801 581 4148",
%%% URL = "http://www.math.utah.edu/~beebe",
%%% checksum = "34328 2669 11093 100650",
%%% email = "beebe at math.utah.edu, beebe at acm.org,
%%% beebe at computer.org (Internet)",
%%% codetable = "ISO/ASCII",
%%% keywords = "Asian language information processing,
%%% bibliography, BibTeX, TALIP",
%%% license = "public domain",
%%% supported = "yes",
%%% docstring = "This is a COMPLETE BibTeX bibliography for
%%% ACM Transactions on Asian language
%%% information processing (TALIP) (CODEN none,
%%% ISSN 1530-0226), which began publishing in
%%% March 2002.
%%%
%%% The journal has a World Wide Web site at
%%%
%%% http://www.acm.org/pubs/talip/
%%% http://portal.acm.org/browse_dl.cfm?&idx=J820
%%%
%%% At version 1.13, the year coverage looked
%%% like this:
%%%
%%% 2002 ( 15) 2005 ( 17) 2008 ( 7)
%%% 2003 ( 22) 2006 ( 28)
%%% 2004 ( 17) 2007 ( 14)
%%%
%%% Article: 120
%%%
%%% Total entries: 120
%%%
%%% This bibliography has been constructed
%%% primarily from the publisher Web site.
%%%
%%% Numerous errors in the sources noted above
%%% have been corrected. Spelling has been
%%% verified with the UNIX spell and GNU ispell
%%% programs using the exception dictionary
%%% stored in the companion file with extension
%%% .sok.
%%%
%%% BibTeX citation tags are uniformly chosen as
%%% name:year:abbrev, where name is the family
%%% name of the first author or editor, year is a
%%% 4-digit number, and abbrev is a 3-letter
%%% condensation of important title words.
%%% Citation labels were automatically generated
%%% by software developed for the BibNet Project.
%%%
%%% In this bibliography, entries are sorted in
%%% publication order, with the help of
%%% ``bibsort -byvolume''. The bibsort utility
%%% is available from ftp.math.utah.edu in
%%% /pub/tex/bib.
%%%
%%% The checksum field above contains a CRC-16
%%% checksum as the first value, followed by the
%%% equivalent of the standard UNIX wc (word
%%% count) utility output of lines, words, and
%%% characters. This is produced by Robert
%%% Solovay's checksum utility.",
%%% }
%%% ====================================================================
@Preamble{
"\hyphenation{
}"
}
%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|http://www.math.utah.edu/~beebe/|"}
%%% ====================================================================
%%% Journal abbreviations:
@String{j-TALIP = "ACM Transactions on Asian Language
Information Processing"}
%%% ====================================================================
%%% Bibliography entries:
@Article{Wong:2002:P,
author = "Kam-Fai Wong and Jun'ichi Tsujii",
title = "Prologue",
journal = j-TALIP,
volume = "1",
number = "1",
pages = "1--2",
month = mar,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Tue Nov 5 23:44:34 MST 2002",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gao:2002:TUA,
author = "Jianfeng Gao and Joshua Goodman and Mingjing Li and
Kai-Fu Lee",
title = "Toward a unified approach to statistical language
modeling for {Chinese}",
journal = j-TALIP,
volume = "1",
number = "1",
pages = "3--33",
month = mar,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Tue Nov 5 23:44:34 MST 2002",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
DOI = "http://doi.acm.org/10.1145/509900.509903",
}
@Article{Lai:2002:MTE,
author = "Yu-Sheng Lai and Chung-Hsien Wu",
title = "Meaningful term extraction and discriminative term
selection in text categorization via unknown-word
methodology",
journal = j-TALIP,
volume = "1",
number = "1",
pages = "34--64",
month = mar,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Tue Nov 5 23:44:34 MST 2002",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
DOI = "http://doi.acm.org/10.1145/509900.509904",
}
@Article{Kim:2002:MBG,
author = "Byeongchang Kim and Gary Geunbae Lee and Jong-Hyeok
Lee",
title = "Morpheme-based grapheme to phoneme conversion using
phonetic patterns and morphophonemic connectivity
information",
journal = j-TALIP,
volume = "1",
number = "1",
pages = "65--82",
month = mar,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Tue Nov 5 23:44:34 MST 2002",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Lee:2002:UTI,
author = "Tan Lee and Wai Lau and Y. W. Wong and P. C. Ching",
title = "Using tone information in {Cantonese} continuous
speech recognition",
journal = j-TALIP,
volume = "1",
number = "1",
pages = "83--102",
month = mar,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Tue Nov 5 23:44:34 MST 2002",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
DOI = "http://doi.acm.org/10.1145/509900.509906",
}
@Article{Chen:2002:BCE,
author = "Hsin-Hsi Chen and Chi-Ching Lin and Wen-Cheng Lin",
title = "Building a {Chinese-English} wordnet for translingual
applications",
journal = j-TALIP,
volume = "1",
number = "2",
pages = "103--122",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Tue Nov 5 23:44:36 MST 2002",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
DOI = "http://doi.acm.org/10.1145/568954.568955",
}
@Article{Meng:2002:GPM,
author = "Helen Meng and Po-Chui Luk and Kui Xu and Fuliang
Weng",
title = "{GLR} parsing with multiple grammars for natural
language queries",
journal = j-TALIP,
volume = "1",
number = "2",
pages = "123--144",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Tue Nov 5 23:44:36 MST 2002",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
DOI = "http://doi.acm.org/10.1145/568954.568956",
}
@Article{Murata:2002:CTM,
author = "Masaki Murata and Qing Ma and Hitoshi Isahara",
title = "Comparison of three machine-learning methods for
{Thai} part-of-speech tagging",
journal = j-TALIP,
volume = "1",
number = "2",
pages = "145--158",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Tue Nov 5 23:44:36 MST 2002",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
DOI = "http://doi.acm.org/10.1145/568954.568957",
}
@Article{Lu:2002:TWQ,
author = "Wen-Hsiang Lu and Lee-Feng Chien and Hsi-Jian Lee",
title = "Translation of web queries using anchor text mining",
journal = j-TALIP,
volume = "1",
number = "2",
pages = "159--172",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Tue Nov 5 23:44:36 MST 2002",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
DOI = "http://doi.acm.org/10.1145/568954.568958",
}
@Article{Li:2002:WBA,
author = "Wenjie Li and Kam-Fai Wong",
title = "A word-based approach for modeling and discovering
temporal relations embedded in {Chinese} sentences",
journal = j-TALIP,
volume = "1",
number = "3",
pages = "173--206",
month = sep,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Aug 7 08:49:00 MDT 2003",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Lee:2002:ACB,
author = "Jin-Seok Lee and Byeongchang Kim and Gary Geunbae
Lee",
title = "Automatic corpus-based tone and break-index prediction
using {K-ToBI} representation",
journal = j-TALIP,
volume = "1",
number = "3",
pages = "207--224",
month = sep,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Aug 7 08:49:00 MDT 2003",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Luk:2002:CCD,
author = "Robert W. P. Luk and K. L. Kwok",
title = "A comparison of {Chinese} document indexing strategies
and retrieval models",
journal = j-TALIP,
volume = "1",
number = "3",
pages = "225--268",
month = sep,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Aug 7 08:49:00 MDT 2003",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Suzuki:2002:LCS,
author = "Izumi Suzuki and Yoshiki Mikami and Ario Ohsato and
Yoshihide Chubachi",
title = "A language and character set determination method
based on {N}-gram statistics",
journal = j-TALIP,
volume = "1",
number = "3",
pages = "269--278",
month = sep,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Aug 7 08:49:00 MDT 2003",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Jin:2002:CDC,
author = "Honglan Jin and Kam-Fai Wong",
title = "A {Chinese} dictionary construction algorithm for
information retrieval",
journal = j-TALIP,
volume = "1",
number = "4",
pages = "281--296",
month = dec,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Aug 7 08:49:01 MDT 2003",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Li:2002:CCB,
author = "Yuanxiang Li and Xiaoqing Ding and Chew Lim Tan",
title = "Combining character-based bigrams with word-based
bigrams in contextual postprocessing for {Chinese}
script recognition",
journal = j-TALIP,
volume = "1",
number = "4",
pages = "297--309",
month = dec,
year = "2002",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Aug 7 08:49:01 MDT 2003",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Lo:2003:CLS,
author = "Wai-Kit Lo and Helen Meng and P. C. Ching",
title = "Cross-language spoken document retrieval using
{HMM}-based retrieval model with multi-scale fusion",
journal = j-TALIP,
volume = "2",
number = "1",
pages = "1--26",
month = mar,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sun Jan 11 10:17:38 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Shi:2003:OHC,
author = "Daming Shi and Robert I. Damper and Steve R. Gunn",
title = "Offline handwritten {Chinese} character recognition by
radical decomposition",
journal = j-TALIP,
volume = "2",
number = "1",
pages = "27--48",
month = mar,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sun Jan 11 10:17:38 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Lee:2003:TAS,
author = "Yue-Shi Lee",
title = "Task adaptation in stochastic language model for
{Chinese} homophone disambiguation",
journal = j-TALIP,
volume = "2",
number = "1",
pages = "49--62",
month = mar,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sun Jan 11 10:17:38 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Shieh:2003:EAT,
author = "Jiann-Cherng Shieh",
title = "An efficient accessing technique for {Taiwanese}
phonetic transcriptions",
journal = j-TALIP,
volume = "2",
number = "1",
pages = "63--77",
month = mar,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sun Jan 11 10:17:38 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Oard:2003:SLE,
author = "Douglas W. Oard",
title = "The surprise language exercises",
journal = j-TALIP,
volume = "2",
number = "2",
pages = "79--84",
month = jun,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:35 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Allan:2003:MTD,
author = "James Allan and Victor Lavrenko and Margaret E.
Connell",
title = "A month to topic detection and tracking in {Hindi}",
journal = j-TALIP,
volume = "2",
number = "2",
pages = "85--100",
month = jun,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:35 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Strassel:2003:LRC,
author = "Stephanie Strassel and Mike Maxwell and Christopher
Cieri",
title = "Linguistic resource creation for research and
technology development: {A} recent experiment",
journal = j-TALIP,
volume = "2",
number = "2",
pages = "101--117",
month = jun,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:35 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Dorr:2003:RPD,
author = "Bonnie J. Dorr and Necip Fazil Ayan and Nizar Habash
and Nitin Madnani and Rebecca Hwa",
title = "Rapid porting of {DUSTer} to {Hindi}",
journal = j-TALIP,
volume = "2",
number = "2",
pages = "118--123",
month = jun,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:35 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Huang:2003:ENE,
author = "Fei Huang and Stephan Vogel and Alex Waibel",
title = "Extracting named entity translingual equivalence with
limited resources",
journal = j-TALIP,
volume = "2",
number = "2",
pages = "124--129",
month = jun,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:35 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Larkey:2003:HCT,
author = "Leah S. Larkey and Margaret E. Connell and Nasreen
Abduljaleel",
title = "{Hindi CLIR} in thirty days",
journal = j-TALIP,
volume = "2",
number = "2",
pages = "130--142",
month = jun,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:35 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Lavie:2003:EHE,
author = "Alon Lavie and Stephan Vogel and Lori Levin and Erik
Peterson and Katharina Probst and Ariadna Font
Llitj{\'o}s and Rachel Reynolds and Jaime Carbonell and
Richard Cohen",
title = "Experiments with a {Hindi-to-English} transfer-based
{MT} system under a miserly data scenario",
journal = j-TALIP,
volume = "2",
number = "2",
pages = "143--163",
month = jun,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:35 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Xu:2003:CLR,
author = "Jinxi Xu and Ralph Weischedel",
title = "Cross-lingual retrieval for {Hindi}",
journal = j-TALIP,
volume = "2",
number = "2",
pages = "164--168",
month = jun,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:35 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{May:2003:SWC,
author = "Jonathan May and Ada Brunstein and Prem Natarajan and
Ralph Weischedel",
title = "Surprise! {What}'s in a {Cebuano} or {Hindi Name?}",
journal = j-TALIP,
volume = "2",
number = "3",
pages = "169--180",
month = sep,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sekine:2003:HEC,
author = "Satoshi Sekine and Ralph Grishman",
title = "{Hindi-English} cross-lingual question-answering
system",
journal = j-TALIP,
volume = "2",
number = "3",
pages = "181--192",
month = sep,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Ma:2003:AHO,
author = "Huanfeng Ma and David Doermann",
title = "Adaptive {Hindi OCR} using generalized {Hausdorff}
image comparison",
journal = j-TALIP,
volume = "2",
number = "3",
pages = "193--218",
month = sep,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{He:2003:MMI,
author = "Daqing He and Douglas W. Oard and Jianqiang Wang and
Jun Luo and Dina Demner-Fushman and Kareem Darwish and
Philip Resnik and Sanjeev Khudanpur and Michael Nossal
and Michael Subotin and Anton Leuski",
title = "Making {MIRACLEs}: {Interactive} translingual search
for {Cebuano} and {Hindi}",
journal = j-TALIP,
volume = "2",
number = "3",
pages = "219--244",
month = sep,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Leuski:2003:CLC,
author = "Anton Leuski and Chin-Yew Lin and Liang Zhou and
Ulrich Germann and Franz Josef Och and Eduard Hovy",
title = "Cross-lingual {C*ST*RD}: {English} access to {Hindi}
information",
journal = j-TALIP,
volume = "2",
number = "3",
pages = "245--269",
month = sep,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Dorr:2003:CLH,
author = "Bonnie Dorr and David Zajic and Richard Schwartz",
title = "Cross-language headline generation for {Hindi}",
journal = j-TALIP,
volume = "2",
number = "3",
pages = "270--289",
month = sep,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Li:2003:RDH,
author = "Wei Li and Andrew McCallum",
title = "Rapid development of {Hindi} named entity recognition
using conditional random fields and feature induction",
journal = j-TALIP,
volume = "2",
number = "3",
pages = "290--294",
month = sep,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Maynard:2003:RCI,
author = "Diana Maynard and Valentin Tablan and Kalina Bontcheva
and Hamish Cunningham",
title = "Rapid customization of an information extraction
system for a surprise language",
journal = j-TALIP,
volume = "2",
number = "3",
pages = "295--300",
month = sep,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kang:2003:IPP,
author = "Mi-Young Kang and Aesun Yoon and Hyuk-Chul Kwon",
title = "Improving partial parsing based on error-pattern
analysis for a {Korean} grammar-checker",
journal = j-TALIP,
volume = "2",
number = "4",
pages = "301--323",
month = dec,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kim:2003:RRE,
author = "Harksoo Kim and Jungyun Seo",
title = "Resolution of referring expressions in a {Korean}
multimodal dialogue system",
journal = j-TALIP,
volume = "2",
number = "4",
pages = "324--337",
month = dec,
year = "2003",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Mani:2004:ISI,
author = "Inderjeet Mani and James Pustejovsky and Beth
Sundheim",
title = "Introduction to the special issue on temporal
information processing",
journal = j-TALIP,
volume = "3",
number = "1",
pages = "1--10",
month = mar,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Han:2004:FRT,
author = "Benjamin Han and Alon Lavie",
title = "A framework for resolution of time in natural
language",
journal = j-TALIP,
volume = "3",
number = "1",
pages = "11--32",
month = mar,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Schilder:2004:EMT,
author = "Frank Schilder",
title = "Extracting meaning from temporal nouns and temporal
prepositions",
journal = j-TALIP,
volume = "3",
number = "1",
pages = "33--50",
month = mar,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Jang:2004:ATT,
author = "Seok Bae Jang and Jennifer Baldwin and Inderjeet
Mani",
title = "Automatic {TIMEX2} tagging of {Korean} news",
journal = j-TALIP,
volume = "3",
number = "1",
pages = "51--65",
month = mar,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Hobbs:2004:OTS,
author = "Jerry R. Hobbs and Feng Pan",
title = "An ontology of time for the {Semantic Web}",
journal = j-TALIP,
volume = "3",
number = "1",
pages = "66--85",
month = mar,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Nov 4 08:37:36 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gao:2004:ISI,
author = "Jianfeng Gao and Chin-Yew Lin",
title = "Introduction to the special issue on statistical
language modeling",
journal = j-TALIP,
volume = "3",
number = "2",
pages = "87--93",
month = jun,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Mon Nov 22 06:20:04 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kim:2004:LTL,
author = "Woosung Kim and Sanjeev Khudanpur",
title = "Lexical triggers and latent semantic analysis for
cross-lingual language model adaptation",
journal = j-TALIP,
volume = "3",
number = "2",
pages = "94--112",
month = jun,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Mon Nov 22 06:20:04 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Linares:2004:HLM,
author = "Diego Linares and Jos{\'e}-Miguel Bened{\'\i} and
Joan-Andreu S{\'a}nchez",
title = "A hybrid language model based on a combination of
{$N$}-grams and stochastic context-free grammars",
journal = j-TALIP,
volume = "3",
number = "2",
pages = "113--127",
month = jun,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Mon Nov 22 06:20:04 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Chen:2004:DHG,
author = "Berlin Chen and Hsin-Min Wang and Lin-Shan Lee",
title = "A discriminative {HMM\slash N}-gram-based retrieval
approach for {Mandarin} spoken documents",
journal = j-TALIP,
volume = "3",
number = "2",
pages = "128--145",
month = jun,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Mon Nov 22 06:20:04 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Nguyen:2004:EBS,
author = "Minh Le Nguyen and Susumu Horiguchi and Akira Shimazu
and Bao Tu Ho",
title = "Example-based sentence reduction using the hidden
{Markov} model",
journal = j-TALIP,
volume = "3",
number = "2",
pages = "146--158",
month = jun,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Mon Nov 22 06:20:04 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Fung:2004:MEC,
author = "Pascale Fung and Grace Ngai and Yongsheng Yang and
Benfeng Chen",
title = "A maximum-entropy {Chinese} parser augmented by
transformation-based learning",
journal = j-TALIP,
volume = "3",
number = "2",
pages = "159--168",
month = jun,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Mon Nov 22 06:20:04 MST 2004",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Li:2004:AMF,
author = "Yujia Li and Tan Lee and Yao Qian",
title = "Analysis and modeling of {F0} contours for {Cantonese}
text-to-speech",
journal = j-TALIP,
volume = "3",
number = "3",
pages = "169--180",
month = sep,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Apr 14 12:20:22 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Huang:2004:UWB,
author = "Chien-Chung Huang and Shui-Lung Chuang and Lee-Feng
Chien",
title = "Using a {Web}-based categorization approach to
generate thematic metadata from texts",
journal = j-TALIP,
volume = "3",
number = "3",
pages = "190--212",
month = sep,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Apr 14 12:20:22 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Myaeng:2004:ISI,
author = "Sung Hyon Myaeng",
title = "Introduction to the special issue on computer
processing of oriental languages",
journal = j-TALIP,
volume = "3",
number = "4",
pages = "213--213",
month = dec,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Apr 14 12:20:22 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Baoli:2004:AKN,
author = "Li Baoli and Lu Qin and Yu Shiwen",
title = "An adaptive $k$-nearest neighbor text categorization
strategy",
journal = j-TALIP,
volume = "3",
number = "4",
pages = "215--226",
month = dec,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Apr 14 12:20:22 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kim:2004:UTI,
author = "Pyung Kim and Sung Hyon Myaeng",
title = "Usefulness of temporal information automatically
extracted from news articles for topic tracking",
journal = j-TALIP,
volume = "3",
number = "4",
pages = "227--242",
month = dec,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Apr 14 12:20:22 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Zhang:2004:ESS,
author = "Le Zhang and Jingbo Zhu and Tianshun Yao",
title = "An evaluation of statistical spam filtering
techniques",
journal = j-TALIP,
volume = "3",
number = "4",
pages = "243--269",
month = dec,
year = "2004",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Apr 14 12:20:22 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Wu:2005:DSF,
author = "Chung-Hsien Wu and Jui-Feng Yeh and Ming-Jun Chen",
title = "Domain-specific {FAQ} retrieval using independent
aspects",
journal = j-TALIP,
volume = "4",
number = "1",
pages = "1--17",
month = mar,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jul 7 13:48:21 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Murata:2005:CEV,
author = "Masaki Murata and Masao Utiyama and Kiyotaka Uchimoto
and Hitoshi Isahara and Qing Ma",
title = "Correction of errors in a verb modality corpus for
machine translation with a machine-learning method",
journal = j-TALIP,
volume = "4",
number = "1",
pages = "18--37",
month = mar,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jul 7 13:48:21 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Hendessi:2005:SSP,
author = "F. Hendessi and A. Ghayoori and T. A. Gulliver",
title = "A speech synthesizer for {Persian} text using a neural
network with a smooth ergodic {HMM}",
journal = j-TALIP,
volume = "4",
number = "1",
pages = "38--52",
month = mar,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jul 7 13:48:21 MDT 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Zhang:2005:COT,
author = "Ying Zhang and Phil Vines and Justin Zobel",
title = "{Chinese} {OOV} translation and post-translation query
expansion in {Chinese--English} cross-lingual
information retrieval",
journal = j-TALIP,
volume = "4",
number = "2",
pages = "57--77",
month = jun,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Dec 17 08:07:33 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Qu:2005:TES,
author = "Yan Qu and David A. Hull and Gregory Grefenstette and
David A. Evans and Motoko Ishikawa and Setsuko Nara and
Toshiya Ueda and Daisuke Noda and Kousaku Arita and
Yuki Funakoshi and Hiroshi Matsuda",
title = "Towards effective strategies for monolingual and
bilingual information retrieval: {Lessons} learned from
{NTCIR-4}",
journal = j-TALIP,
volume = "4",
number = "2",
pages = "78--110",
month = jun,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Dec 17 08:07:33 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sakai:2005:FPR,
author = "Tetsuya Sakai and Toshihiko Manabe and Makoto Koyama",
title = "Flexible pseudo-relevance feedback via selective
sampling",
journal = j-TALIP,
volume = "4",
number = "2",
pages = "111--135",
month = jun,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Dec 17 08:07:33 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kwok:2005:RRP,
author = "Kui Lam Kwok and Sora Choi and Norbert Dinstl",
title = "Rich results from poor resources: {NTCIR-4}
monolingual and cross-lingual retrieval of {Korean}
texts using {Chinese} and {English}",
journal = j-TALIP,
volume = "4",
number = "2",
pages = "135--158",
month = jun,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Dec 17 08:07:33 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Savoy:2005:CSM,
author = "Jacques Savoy",
title = "Comparative study of monolingual and multilingual
search models for use with {Asian} languages",
journal = j-TALIP,
volume = "4",
number = "2",
pages = "159--185",
month = jun,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Dec 17 08:07:33 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Mase:2005:PTS,
author = "Hisao Mase and Tadataka Matsubayashi and Yuichi Ogawa
and Makoto Iwayama and Tadaaki Oshio",
title = "Proposal of two-stage patent retrieval method
considering the claim structure",
journal = j-TALIP,
volume = "4",
number = "2",
pages = "186--202",
month = jun,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Dec 17 08:07:33 MST 2005",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Nakagawa:2005:PSI,
author = "Hiroshi Nakagawa and Tatsunori Mori and Noriko Kando",
title = "Preface to the special issues on {NTCIR-4}",
journal = j-TALIP,
volume = "4",
number = "3",
pages = "237--242",
month = sep,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jan 26 08:28:41 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kato:2005:ODQ,
author = "Tsuneaki Kato and Jun'ichi Fukumoto and Fumito Masui
and Noriko Kando",
title = "Are open-domain question answering technologies useful
for information access dialogues?---an empirical study
and a proposal of a novel challenge",
journal = j-TALIP,
volume = "4",
number = "3",
pages = "243--262",
month = sep,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jan 26 08:28:41 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Isozaki:2005:AHP,
author = "Hideki Isozaki",
title = "An analysis of a high-performance {Japanese} question
answering system",
journal = j-TALIP,
volume = "4",
number = "3",
pages = "263--279",
month = sep,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jan 26 08:28:41 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Mori:2005:JQA,
author = "Tatsunori Mori",
title = "{Japanese} question-answering system using {A*} search
and its improvement",
journal = j-TALIP,
volume = "4",
number = "3",
pages = "280--304",
month = sep,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jan 26 08:28:41 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Mori:2005:MAF,
author = "Tatsunori Mori and Masanori Nozawa and Yoshiaki
Asada",
title = "Multi-answer-focused multi-document summarization
using a question-answering engine",
journal = j-TALIP,
volume = "4",
number = "3",
pages = "305--320",
month = sep,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jan 26 08:28:41 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Okazaki:2005:ICO,
author = "Naoaki Okazaki and Yutaka Matsuo and Mitsuru
Ishizuka",
title = "Improving chronological ordering of sentences
extracted from multiple newspaper articles",
journal = j-TALIP,
volume = "4",
number = "3",
pages = "321--339",
month = sep,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jan 26 08:28:41 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Yoshioka:2005:CPB,
author = "Masaharu Yoshioka and Makoto Haraguchi",
title = "On a combination of probabilistic and {Boolean} {IR}
models for {WWW} document retrieval",
journal = j-TALIP,
volume = "4",
number = "3",
pages = "340--356",
month = sep,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jan 26 08:28:41 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Lingpeng:2005:CIR,
author = "Yang Lingpeng and Ji Donghong and Tang Li and Niu
Zhengyu",
title = "{Chinese} information retrieval based on terms and
relevant terms",
journal = j-TALIP,
volume = "4",
number = "3",
pages = "357--374",
month = sep,
year = "2005",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Jan 26 08:28:41 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Sakai:2006:ISI,
author = "Tetsuya Sakai and Yuji Matsumoto",
title = "Introduction to the special issue: {Recent} advances
in information processing and access for {Japanese}",
journal = j-TALIP,
volume = "4",
number = "4",
pages = "375--376",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Feb 16 10:54:02 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Doi:2006:EBM,
author = "Takao Doi and Hirofumi Yamamoto and Eiichiro Sumita",
title = "Example-based machine translation using efficient
sentence retrieval based on edit-distance",
journal = j-TALIP,
volume = "4",
number = "4",
pages = "377--399",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Feb 16 10:54:02 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Tomiura:2006:ESS,
author = "Yoichi Tomiura and Shosaku Tanaka and Toru Hitaka",
title = "Estimating satisfactoriness of selectional restriction
from corpus without a thesaurus",
journal = j-TALIP,
volume = "4",
number = "4",
pages = "400--416",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Feb 16 10:54:02 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Iida:2006:ARA,
author = "Ryu Iida and Kentaro Inui and Yuji Matsumoto",
title = "Anaphora resolution by antecedent identification
followed by anaphoricity determination",
journal = j-TALIP,
volume = "4",
number = "4",
pages = "417--434",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Feb 16 10:54:02 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Inui:2006:ACK,
author = "Takashi Inui and Kentaro Inui and Yuji Matsumoto",
title = "Acquiring causal knowledge from text using the
connective marker {\em tame\/}",
journal = j-TALIP,
volume = "4",
number = "4",
pages = "435--474",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Feb 16 10:54:02 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Ma:2006:TSB,
author = "Qiang Ma and Katsumi Tanaka",
title = "Topic-structure-based complementary information
retrieval and its application",
journal = j-TALIP,
volume = "4",
number = "4",
pages = "475--503",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu Feb 16 10:54:02 MST 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Park:2006:ATM,
author = "Jong C. Park and Gary Geunbae Lee and Limsoon Wong",
title = "{AUTHOR}: {Text} mining and management in
biomedicine",
journal = j-TALIP,
volume = "5",
number = "1",
pages = "1--3",
month = mar,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu May 11 11:29:25 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Park:2006:MBB,
author = "Kyung-Mi Park and Seon-Ho Kim and Hae-Chang Rim and
Young-Sook Hwang",
title = "{ME}-based biomedical named entity recognition using
lexical knowledge",
journal = j-TALIP,
volume = "5",
number = "1",
pages = "4--21",
month = mar,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu May 11 11:29:25 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Nenadic:2006:MSR,
author = "Goran Nenadi{\'c} and Sophia Ananiadou",
title = "Mining semantically related terms from biomedical
literature",
journal = j-TALIP,
volume = "5",
number = "1",
pages = "22--43",
month = mar,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu May 11 11:29:25 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kim:2006:ECI,
author = "Jung-Jae Kim and Jong C. Park",
title = "Extracting contrastive information from negation
patterns in biomedical literature",
journal = j-TALIP,
volume = "5",
number = "1",
pages = "44--60",
month = mar,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu May 11 11:29:25 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Kim:2006:TPL,
author = "Eunju Kim and Yu Song and Cheongjae Lee and Kyoungduk
Kim and Gary Geunbae Lee and Byoung-Kee Yi and Jeongwon
Cha",
title = "Two-phase learning for biological event extraction and
verification",
journal = j-TALIP,
volume = "5",
number = "1",
pages = "61--73",
month = mar,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu May 11 11:29:25 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Mima:2006:TBK,
author = "Hideki Mima and Sophia Ananiadou and Katsumori
Matsushima",
title = "Terminology-based knowledge mining for new knowledge
discovery",
journal = j-TALIP,
volume = "5",
number = "1",
pages = "74--88",
month = mar,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Thu May 11 11:29:25 MDT 2006",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Carpuat:2006:AWS,
author = "Marine Carpuat and Pascale Fung and Grace Ngai",
title = "Aligning word senses using bilingual corpora",
journal = j-TALIP,
volume = "5",
number = "2",
pages = "89--120",
month = jun,
year = "2006",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1165255.1165256",
ISSN = "1530-0226",
bibdate = "Thu Oct 5 07:00:29 MDT 2006",
bibsource = "http://portal.acm.org/",
abstract = "The growing importance of multilingual information
retrieval and machine translation has made multilingual
ontologies extremely valuable resources. Since the
construction of an ontology from scratch is a very
expensive and time-consuming undertaking, it is
attractive to consider ways of automatically aligning
monolingual ontologies, which already exist for many of
the world's major languages. Previous research
exploited similarity in the structure of the ontologies
to align, or manually created bilingual resources.
These approaches cannot be used to align ontologies
with vastly different structures and can only be
applied to much studied language pairs for which
expensive resources are already available. In this
paper, we propose a novel approach to align the
ontologies at the node level: Given a concept
represented by a particular word sense in one ontology,
our task is to find the best corresponding word sense
in the second language ontology. To this end, we
present a language-independent, corpus-based method
that borrows from techniques used in information
retrieval and machine translation. We show its
efficiency by applying it to two very different
ontologies in very different languages: the Mandarin
Chinese HowNet and the American English WordNet.
Moreover, we propose a methodology to measure bilingual
corpora comparability and show that our method is
robust enough to use noisy nonparallel bilingual
corpora efficiently, when clean parallel corpora are
not available.",
acknowledgement = ack-nhfb,
}
@Article{Lee:2006:ABN,
author = "Chun-Jen Lee and Jason S. Chang and Jyh-Shing R.
Jang",
title = "Alignment of bilingual named entities in parallel
corpora using statistical models and multiple knowledge
sources",
journal = j-TALIP,
volume = "5",
number = "2",
pages = "121--145",
month = jun,
year = "2006",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1165255.1165257",
ISSN = "1530-0226",
bibdate = "Thu Oct 5 07:00:29 MDT 2006",
bibsource = "http://portal.acm.org/",
abstract = "Named entity (NE) extraction is one of the fundamental
tasks in natural language processing (NLP). Although
many studies have focused on identifying NEs within
monolingual documents, aligning NEs in bilingual
documents has not been investigated extensively due to
the complexity of the task. In this article we
introduce a new approach to aligning bilingual NEs in
parallel corpora by incorporating statistical models
with multiple knowledge sources. In our approach, we
model the process of translating an English NE phrase
into a Chinese equivalent using lexical
translation\slash transliteration probabilities for
word translation and alignment probabilities for word
reordering. The method involves automatically learning
phrase alignment and acquiring word translations from a
bilingual phrase dictionary and parallel corpora, and
automatically discovering transliteration
transformations from a training set of
name-transliteration pairs. The method also involves
language-specific knowledge functions, including
handling abbreviations, recognizing Chinese personal
names, and expanding acronyms. At runtime, the proposed
models are applied to each source NE in a pair of
bilingual sentences to generate and evaluate the target
NE candidates; the source and target NEs are then
aligned based on the computed
probabilities. Experimental results demonstrate that
the proposed approach, which integrates statistical
models with extra knowledge sources, is highly feasible
and offers significant improvement in performance
compared to our previous work, as well as the
traditional approach of IBM Model 4.",
acknowledgement = ack-nhfb,
}
@Article{Shirado:2006:UJH,
author = "Tamotsu Shirado and Satoko Marumoto and Masaki Murata
and Hitoshi Isahara",
title = "Using {Japanese} honorific expressions: {A}
psychological study",
journal = j-TALIP,
volume = "5",
number = "2",
pages = "146--164",
month = jun,
year = "2006",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1165255.1165258",
ISSN = "1530-0226",
bibdate = "Thu Oct 5 07:00:29 MDT 2006",
bibsource = "http://portal.acm.org/",
abstract = "We investigated, via experiment, knowledge of
normative honorific expressions as used in textbooks
and in practice by people. Forty subjects divided into
four groups according to age (younger\slash older) and
gender (male\slash female) participated in the
experiments. The results show that knowledge about the
use of normative honorific expressions in textbooks is
similar to that demonstrated by the younger subject
groups, but differed from that of the older subject
groups. The knowledge of the older subjects was more
complex than that shown in textbooks or demonstrated by
the younger subjects. A model that can identify misuse
of honorific expressions in sentences is the framework
for this investigation. The model is minimal, but could
represent 76\% to 92\% of the subjects' knowledge
regarding each honorific element. This model will be
useful in the development of computer-aided systems to
help teach how honorific expressions should be used.",
acknowledgement = ack-nhfb,
}
@Article{Wu:2006:ERT,
author = "Chung-Hsien Wu and Ze-Jing Chuang and Yu-Chung Lin",
title = "Emotion recognition from text using semantic labels
and separable mixture models",
journal = j-TALIP,
volume = "5",
number = "2",
pages = "165--183",
month = jun,
year = "2006",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1165255.1165259",
ISSN = "1530-0226",
bibdate = "Thu Oct 5 07:00:29 MDT 2006",
bibsource = "http://portal.acm.org/",
abstract = "This study presents a novel approach to automatic
emotion recognition from text. First, emotion
generation rules (EGRs) are manually deduced from
psychology to represent the conditions for generating
emotion. Based on the EGRs, the emotional state of each
sentence can be represented as a sequence of semantic
labels (SLs) and attributes (ATTs); SLs are defined as
the domain-independent features, while ATTs are
domain-dependent. The emotion association rules (EARs)
represented by SLs and ATTs for each emotion are
automatically derived from the sentences in an
emotional text corpus using the a priori algorithm.
Finally, a separable mixture model (SMM) is adopted to
estimate the similarity between an input sentence and
the EARs of each emotional state. Since some features
defined in this approach are domain-dependent, a dialog
system focusing on the students' daily expressions is
constructed, and only three emotional states, happy,
unhappy, and neutral, are considered for performance
evaluation. According to the results of the
experiments, given the domain corpus, the proposed
approach is promising, and easily ported into other
domains.",
acknowledgement = ack-nhfb,
}
@Article{Dale:2006:ISS,
author = "Robert Dale",
title = "Introduction to the {Special} section: {Extended} best
papers from {IJCNLP 2005}",
journal = j-TALIP,
volume = "5",
number = "3",
pages = "183--184",
month = sep,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:36 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Oh:2006:MTM,
author = "Jong-Hoon Oh and Key-Sun Choi and Hitoshi Isahara",
title = "A machine transliteration model based on
correspondence between graphemes and phonemes",
journal = j-TALIP,
volume = "5",
number = "3",
pages = "185--208",
month = sep,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:36 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gao:2006:ESL,
author = "Jianfeng Gao and Hisami Suzuki and Wei Yuan",
title = "An empirical study on language model adaptation",
journal = j-TALIP,
volume = "5",
number = "3",
pages = "209--227",
month = sep,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:36 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Ye:2006:SRL,
author = "Patrick Ye and Timothy Baldwin",
title = "Semantic role labeling of prepositional phrases",
journal = j-TALIP,
volume = "5",
number = "3",
pages = "228--244",
month = sep,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:36 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Chung:2006:APD,
author = "Tze Leung Chung and Robert Wing Pong Luk and Kam Fai
Wong and Kui Lam Kwok and Dik Lun Lee",
title = "Adapting pivoted document-length normalization for
query size: {Experiments} in {Chinese} and {English}",
journal = j-TALIP,
volume = "5",
number = "3",
pages = "245--263",
month = sep,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:36 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Matsumura:2006:ERB,
author = "Atsushi Matsumura and Atsuhiro Takasu and Jun Adachi",
title = "Effect of relationships between words on {Japanese}
information retrieval",
journal = j-TALIP,
volume = "5",
number = "3",
pages = "264--289",
month = sep,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:36 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Song:2006:ISI,
author = "Dawei Song and Jian-Yun Nie",
title = "Introduction to special issue on reasoning in natural
language information processing",
journal = j-TALIP,
volume = "5",
number = "4",
pages = "291--295",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Nie:2006:ILM,
author = "Jian-Yun Nie and Guihong Cao and Jing Bai",
title = "Inferential language models for information
retrieval",
journal = j-TALIP,
volume = "5",
number = "4",
pages = "296--322",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Gao:2006:SQT,
author = "Jianfeng Gao and Jian-Yun Nie and Ming Zhou",
title = "Statistical query translation models for
cross-language information retrieval",
journal = j-TALIP,
volume = "5",
number = "4",
pages = "323--359",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Liu:2006:SFQ,
author = "Yi Liu and Rong Jin and Joyce Y. Chai",
title = "A statistical framework for query translation
disambiguation",
journal = j-TALIP,
volume = "5",
number = "4",
pages = "360--387",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Li:2006:TTT,
author = "Baoli Li and Wenjie Li and Qin Lu",
title = "Topic tracking with time granularity reasoning",
journal = j-TALIP,
volume = "5",
number = "4",
pages = "388--412",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Phan:2006:IDS,
author = "Xuan-Hieu Phan and Le-Minh Nguyen and Yasushi Inoguchi
and Tu-Bao Ho and Susumu Horiguchi",
title = "Improving discriminative sequential learning by
discovering important association of statistics",
journal = j-TALIP,
volume = "5",
number = "4",
pages = "413--438",
month = dec,
year = "2006",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
}
@Article{Chen:2007:UDM,
author = "Yong Chen and Kwok-Ping Chan",
title = "Using data mining techniques and rough set theory for
language modeling",
journal = j-TALIP,
volume = "6",
number = "1",
pages = "??--??",
month = apr,
year = "2007",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
articleno = "2",
}
@Article{Hsu:2007:MSB,
author = "Chung-Chian Hsu and Chien-Hsing Chen and Tien-Teng
Shih and Chun-Kai Chen",
title = "Measuring similarity between transliterations against
noise data",
journal = j-TALIP,
volume = "6",
number = "1",
pages = "??--??",
month = apr,
year = "2007",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
articleno = "5",
}
@Article{Sakai:2007:RFQ,
author = "Tetsuya Sakai",
title = "On the reliability of factoid question answering
evaluation",
journal = j-TALIP,
volume = "6",
number = "1",
pages = "??--??",
month = apr,
year = "2007",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
articleno = "3",
}
@Article{Wiseman:2007:CBC,
author = "Yair Wiseman and Irit Gefner",
title = "Conjugation-based compression for {Hebrew} texts",
journal = j-TALIP,
volume = "6",
number = "1",
pages = "??--??",
month = apr,
year = "2007",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
articleno = "4",
}
@Article{Wu:2007:TBS,
author = "Chung-Hsien Wu and Hung-Yu Su and Yu-Hsien Chiu and
Chia-Hung Lin",
title = "Transfer-based statistical translation of {Taiwanese}
sign language using {PCFG}",
journal = j-TALIP,
volume = "6",
number = "1",
pages = "??--??",
month = apr,
year = "2007",
CODEN = "????",
ISSN = "1530-0226",
bibdate = "Sat Apr 14 10:21:37 MDT 2007",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
articleno = "1",
}
@Article{Kuo:2007:PSM,
author = "Jin-Shea Kuo and Haizhou Li and Ying-Kuei Yang",
title = "A phonetic similarity model for automatic extraction
of transliteration pairs",
journal = j-TALIP,
volume = "6",
number = "2",
pages = "6:1--6:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1282080.1282081",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:11:28 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "This article proposes an approach for the automatic
extraction of transliteration pairs from Chinese Web
corpora. In this approach, we formulate the machine
transliteration process using a syllable-based phonetic
similarity model which consists of phonetic confusion
matrices and a Chinese character n -gram language
model. With the phonetic similarity model, the
extraction of transliteration pairs becomes a two-step
process of recognition followed by validation: First,
in the recognition process, we identify the most
probable transliteration in the k -neighborhood of a
recognized English word. Then, in the validation
process, we qualify the transliteration pair candidates
with a hypothesis test. We carry out an analytical
study on the statistics of several key factors in
English-Chinese transliteration to help formulate
phonetic similarity modeling. We then conduct both
supervised and unsupervised learning of a phonetic
similarity model on a development database. The
experimental results validate the effectiveness of the
phonetic similarity model by achieving an $F$-measure of
0.739 in supervised learning. The unsupervised learning
approach works almost as well as the supervised one,
thus allowing us to deploy automatic extraction of
transliteration pairs in the Web space.",
acknowledgement = ack-nhfb,
articleno = "6",
keywords = "extraction of transliteration pairs; machine
translation; machine transliteration; phonetic
confusion probability; phonetic similarity modeling",
}
@Article{Xiao:2007:SNM,
author = "Jinghui Xiao and Xiaolong Wang and Bingquan Liu",
title = "The study of a nonstationary maximum entropy {Markov}
model and its application on the pos-tagging task",
journal = j-TALIP,
volume = "6",
number = "2",
pages = "7:1--7:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1282080.1282082",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:11:28 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Sequence labeling is a core task in natural language
processing. The maximum entropy Markov model (MEMM) is
a powerful tool in performing this task. This article
enhances the traditional MEMM by exploiting the
positional information of language elements. The
stationary hypothesis is relaxed in MEMM, and the
nonstationary MEMM (NS-MEMM) is proposed. Several
related issues are discussed in detail, including the
representation of positional information, NS-MEMM
implementation, smoothing techniques, and the space
complexity issue. Furthermore, the asymmetric NS-MEMM
presents a more flexible way to exploit positional
information. In the experiments, NS-MEMM is evaluated
on both the Chinese and the English pos-tagging tasks.
According to the experimental results, NS-MEMM yields
effective improvements over MEMM by exploiting
positional information. The smoothing techniques in
this article effectively solve the NS-MEMM
data-sparseness problem; the asymmetric NS-MEMM is also
an improvement by exploiting positional information in
a more flexible way.",
acknowledgement = ack-nhfb,
articleno = "7",
keywords = "data sparseness problem; Markov property; MEMM;
pos-tagging; stationary hypothesis",
}
@Article{Zhuang:2007:IHD,
author = "Yl Zhuang and Yueting Zhuang and Qing Li and Lei
Chen",
title = "Interactive high-dimensional index for large {Chinese}
calligraphic character databases",
journal = j-TALIP,
volume = "6",
number = "2",
pages = "8:1--8:??",
month = sep,
year = "2007",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1282080.1282083",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:11:28 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The large numbers of Chinese calligraphic scripts in
existence are valuable part of the Chinese cultural
heritage. However, due to the shape complexity of these
characters, it is hard to employ existing techniques to
effectively retrieve and efficiently index them. In
this article, using a novel shape-similarity- based
retrieval method in which shapes of calligraphic
characters are represented by their contour points
extracted from the character images, we propose an
interactive partial-distance-map (PDM)- based
high-dimensional indexing scheme which is designed
specifically to speed up the retrieval performance of
the large Chinese calligraphic character databases
effectively. Specifically, we use the approximate
minimal bounding sphere of a query character and
utilize users' relevance feedback to refine the query
gradually. Comprehensive experiments are conducted to
testify the efficiency and effectiveness of this
method. In addition, a new $k$-NN search called Pseudo
$k$-NN (P $k$-NN) search is presented to better
facilitate the PDM-based character retrieval.",
acknowledgement = ack-nhfb,
articleno = "8",
keywords = "Chinese calligraphic character; hyper-centre
relocation; Pseudo k-NN",
}
@Article{Saraswathi:2007:CPE,
author = "S. Saraswathi and T. V. Geetha",
title = "Comparison of performance of enhanced morpheme-based
language model with different word-based language
models for improving the performance of {Tamil} speech
recognition system",
journal = j-TALIP,
volume = "6",
number = "3",
pages = "9:1--9:??",
month = nov,
year = "2007",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1290002.1290003",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:11:45 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "This paper describes a new technique of language
modeling for a highly inflectional Dravidian language,
Tamil. It aims to alleviate the main problems
encountered in processing of Tamil language, like
enormous vocabulary growth caused by the large number
of different forms derived from one word. The size of
the vocabulary was reduced by, decomposing the words
into stems and endings and storing these sub word units
(morphemes) in the vocabulary separately. A enhanced
morpheme-based language model was designed for the
inflectional language Tamil. The enhanced
morpheme-based language model was trained on the
decomposed corpus. The perplexity and Word Error Rate
(WER) were obtained to check the efficiency of the
model for Tamil speech recognition system. The results
were compared with word-based bigram and trigram
language models, distance based language model,
dependency based language model and class based
language model. From the results it was analyzed that
the enhanced morpheme-based trigram model with Katz
back-off smoothing effect improved the performance of
the Tamil speech recognition system when compared to
the word-based language models.",
acknowledgement = ack-nhfb,
articleno = "9",
keywords = "language model; morphemes; perplexity; word error rate
and speech recognition",
}
@Article{Hussain:2007:DLS,
author = "Sarmad Hussain and Sana Gul and Afifah Waseem",
title = "Developing lexicographic sorting: {An} example for
{Urdu}",
journal = j-TALIP,
volume = "6",
number = "3",
pages = "10:1--10:??",
month = nov,
year = "2007",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1290002.1290004",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:11:45 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Collation or lexicographic sorting is essential to
develop multilingual computing. This paper presents the
challenges faced in developing collation sequence for a
language. The paper discusses both theoretical
linguistic and practical standardization and encoding
related considerations that need to be addressed for
languages for which relevant standards and/or solutions
have not been defined. The paper also defines the
process, by giving the details of the procedure
followed for Urdu language, which is the national
language of Pakistan and is spoken by more than 100
million people across the world. The paper is oriented
towards organizations involved in developing and using
collation standards and the localization industry, and
not focused on theoretical issues.",
acknowledgement = ack-nhfb,
articleno = "10",
keywords = "text processing; Urdu",
}
@Article{Fukumoto:2007:TTB,
author = "Fumiyo Fukumoto and Yoshimi Suzuki",
title = "Topic tracking based on bilingual comparable corpora
and semisupervised clustering",
journal = j-TALIP,
volume = "6",
number = "3",
pages = "11:1--11:??",
month = nov,
year = "2007",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1290002.1290005",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:11:45 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In this paper, we address the problem of skewed data
in topic tracking: the small number of stories labeled
positive as compared to negative stories and propose a
method for estimating effective training stories for
the topic-tracking task. For a small number of labeled
positive stories, we use bilingual comparable, i.e.,
English, and Japanese corpora, together with the EDR
bilingual dictionary, and extract story pairs
consisting of positive and associated stories. To
overcome the problem of a large number of labeled
negative stories, we classified them into clusters.
This is done using a semisupervised clustering
algorithm, combining $k$ means with EM. The method was
tested on the TDT English corpus and the results showed
that the system works well when the topic under
tracking is talking about an event originating in the
source language country, even for a small number of
initial positive training stories.",
acknowledgement = ack-nhfb,
articleno = "11",
keywords = "bilingual comparable corpora; clustering; EM
algorithm; N-gram model; topic detection and tracking",
}
@Article{Iida:2007:ZAR,
author = "Ryu Iida and Kentaro Inui and Yuji Matsumoto",
title = "Zero-anaphora resolution by learning rich syntactic
pattern features",
journal = j-TALIP,
volume = "6",
number = "4",
pages = "1:1--1:22",
month = dec,
year = "2007",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1316457.1316458",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:11:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "We approach the zero-anaphora resolution problem by
decomposing it into intrasentential and intersentential
zero-anaphora resolution tasks. For the former task,
syntactic patterns of zeropronouns and their
antecedents are useful clues. Taking Japanese as a
target language, we empirically demonstrate that
incorporating rich syntactic pattern features in a
state-of-the-art learning-based anaphora resolution
model dramatically improves the accuracy of
intrasentential zero-anaphora, which consequently
improves the overall performance of zero-anaphora
resolution.",
acknowledgement = ack-nhfb,
}
@Article{Adriani:2007:SIC,
author = "Mirna Adriani and Jelita Asian and Bobby Nazief and S.
M. M. Tahaghoghi and Hugh E. Williams",
title = "Stemming {Indonesian}: {A} confix-stripping approach",
journal = j-TALIP,
volume = "6",
number = "4",
pages = "2:1--2:33",
month = dec,
year = "2007",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1316457.1316458",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:11:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Stemming words to (usually) remove suffixes has
applications in text search, machine translation,
document summarization, and text classification. For
example, English stemming reduces the words 'computer,'
'computing,' 'computation,' and 'computability' to
their common morphological root, 'comput-.' In text
search, this permits a search for 'computers' to find
documents containing all words with the stem 'comput-.'
In the Indonesian language, stemming is of crucial
importance: words have prefixes, suffixes, infixes, and
confixes that make matching related words
difficult.\par
This work surveys existing techniques for stemming
Indonesian words to their morphological roots, presents
our novel and highly accurate CS algorithm, and
explores the effectiveness of stemming in the context
of general-purpose text information retrieval through
ad hoc queries.",
acknowledgement = ack-nhfb,
keywords = "Indonesian; information retrieval; stemming",
}
@Article{Thao:2007:NER,
author = "Pham Thi Xuan Thao and Tran Quoc Tri and Dinh Dien and
Nigel Collier",
title = "Named entity recognition in {Vietnamese} using
classifier voting",
journal = j-TALIP,
volume = "6",
number = "4",
pages = "3:1--3:18",
month = dec,
year = "2007",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1316457.1316460",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:11:55 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Named entity recognition (NER) is one of the
fundamental tasks in natural-language processing (NLP).
Though the combination of different classifiers has
been widely applied in several well-studied languages,
this is the first time this method has been applied to
Vietnamese. In this article, we describe how voting
techniques can improve the performance of Vietnamese
NER. By combining several state-of-the-art
machine-learning algorithms using voting strategies,
our final result outperforms individual algorithms and
gained an $F$-measure of 89.12. A detailed discussion
about the challenges of NER in Vietnamese is also
presented.",
acknowledgement = ack-nhfb,
keywords = "C4.5; Conditional Random Fields; Na{\"\i}ve Bayes
named entity recognition; support vector machines;
transformation based learning; Vietnamese; voting",
}
@Article{Chen:2008:SBM,
author = "Yufeng Chen and Chengqing Zong",
title = "A Structure-Based Model for {Chinese} Organization
Name Translation",
journal = j-TALIP,
volume = "7",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1330291.1330292",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:12:10 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Named entity (NE) translation is a fundamental task in
multilingual natural language processing. The
performance of a machine translation system depends
heavily on precise translation of the inclusive NEs.
Furthermore, organization name (ON) is the most complex
NE for translation among all the NEs. In this article,
the structure formulation of ONs is investigated and a
hierarchical structure-based ON translation model for
Chinese-to-English translation system is
presented.\par
First, the model performs ON chunking; then both the
translation of words within chunks and the process of
chunk-reordering are achieved by synchronous
context-free grammar (CFG). The CFG rules are extracted
from bilingual ON pairs in a training program.\par
The main contributions of this article are: (1)
defining appropriate chunk-units for analyzing the
internal structure of Chinese ONs; (2) making the
chunk-based ON translation feasible and flexible via a
hierarchical CFG derivation; and (3) proposing a
training architecture to automatically learn the
synchronous CFG for constructing ONs with chunk-units
from aligned bilingual ON pairs. The experiments show
that the proposed approach translates the Chinese ONs
into English with an accuracy of 93.75\% and
significantly improves the performance of a baseline
statistical machine translation (SMT) system.",
acknowledgement = ack-nhfb,
articleno = "1",
keywords = "alignment; chunk; hierarchical derivation; machine
translation; named entity; organization name; rules
extraction; structural analysis; synchronous
context-free grammar",
}
@Article{Jeong:2008:ISR,
author = "Minwoo Jeong and Gary Geunbae Lee",
title = "Improving Speech Recognition and Understanding
using Error-Corrective Reranking",
journal = j-TALIP,
volume = "7",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1330291.1330293",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:12:10 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "The main issues of practical spoken-language
applications for human-computer interface are how to
overcome speech recognition errors and guarantee the
reasonable end-performance of spoken-language
applications. Therefore, handling the erroneously
recognized outputs is a key in developing robust
spoken-language systems. To address this problem, we
present a method to improve the accuracy of speech
recognition and performance of spoken-language
applications. The proposed error corrective reranking
approach exploits recognition environment
characteristics and domain-specific semantic
information to provide robustness and adaptability for
a spoken-language system. We demonstrate some
experiments of spoken dialogue tasks and empirical
results that show an improvement in accuracy for both
speech recognition and spoken-language understanding.
In our experiment, we show an error reduction of up to
9.7\% and 16.8\%; of word error rate, and 5.5\% and
7.9\% of understanding error for the air travel and
telebanking service domains.",
acknowledgement = ack-nhfb,
articleno = "2",
keywords = "automatic speech recognition; error-corrective
reranking; improving spoken dialogue system;
spoken-language understanding",
}
@Article{Kuo:2008:MSG,
author = "June-Jei Kuo and Hsin-Hsi Chen",
title = "Multidocument Summary Generation: Using
Informative and Event Words",
journal = j-TALIP,
volume = "7",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2008",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1330291.1330294",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:12:10 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Summary generation for multiple documents poses a
number of issues including sentence selection, sentence
ordering, and sentence reduction over single-document
summarization. In addition, the temporal resolution
among extracted sentences is also important. This
article considers informative words and event words to
deal with multidocument summarization. These words
indicate the important concepts and relationships in a
document or among a set of documents, and can be used
to select salient sentences. We present a temporal
resolution algorithm, using focusing time and
coreference chains, to convert Chinese temporal
expressions in a document into calendrical forms.
Moreover, we consider the last calendrical form of a
sentence as a sentence time stamp to address sentence
ordering. Informative words, event words, and temporal
words are introduced to a sentence reduction algorithm,
which deals with both length constraints and
information coverage. Experiments on Chinese-news data
sets show significant improvements of both information
coverage and readability.",
acknowledgement = ack-nhfb,
articleno = "3",
keywords = "latent semantic analysis; multidocument summary
generation; sentence ordering; sentence reduction;
sentence selection; temporal processing",
}
@Article{Kando:2008:INS,
author = "Noriko Kando and Teruko Mitamura and Tetsuya Sakai",
title = "Introduction to the {NTCIR-6 Special Issue}",
journal = j-TALIP,
volume = "7",
number = "2",
pages = "4:1--4:??",
month = jun,
year = "2008",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1362782.1362783",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:12:23 MDT 2008",
bibsource = "http://portal.acm.org/",
acknowledgement = ack-nhfb,
articleno = "4",
}
@Article{Zhou:2008:HTE,
author = "Dong Zhou and Mark Truran and Tim Brailsford and Helen
Ashman",
title = "A Hybrid Technique for {English--Chinese} Cross
Language Information Retrieval",
journal = j-TALIP,
volume = "7",
number = "2",
pages = "5:1--5:??",
month = jun,
year = "2008",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1362782.1362784",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:12:23 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "In this article we describe a hybrid technique for
dictionary-based query translation suitable for
English-Chinese cross language information retrieval.
This technique marries a graph-based model for the
resolution of candidate term ambiguity with a
pattern-based method for the translation of
out-of-vocabulary (OOV) terms. We evaluate the
performance of this hybrid technique in an experiment
using several NTCIR test collections. Experimental
results indicate a substantial increase in retrieval
effectiveness over various baseline systems
incorporating machine- and dictionary-based
translation.",
acknowledgement = ack-nhfb,
articleno = "5",
keywords = "cross language information retrieval; disambiguation;
graph-based analysis; patterns; unknown term
translation",
}
@Article{Higashinaka:2008:AAC,
author = "Ryuichiro Higashinaka and Hideki Isozaki",
title = "Automatically Acquiring Causal Expression Patterns
from Relation-annotated Corpora to Improve Question
Answering for why-Questions",
journal = j-TALIP,
volume = "7",
number = "2",
pages = "6:1--6:??",
month = jun,
year = "2008",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1362782.1362785",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:12:23 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "This article describes our approach for answering
why-questions that we initially introduced at NTCIR-6
QAC-4. The approach automatically acquires causal
expression patterns from relation-annotated corpora by
abstracting text spans annotated with a causal relation
and by mining syntactic patterns that are useful for
distinguishing sentences annotated with a causal
relation from those annotated with other relations. We
use these automatically acquired causal expression
patterns to create features to represent answer
candidates, and use these features together with other
possible features related to causality to train an
answer candidate ranker that maximizes the QA
performance with regards to the corpus of why-questions
and answers. NAZEQA, a Japanese why-QA system based on
our approach, clearly outperforms baselines with a Mean
Reciprocal Rank (top-5) of 0.223 when sentences are
used as answers and with a MRR (top-5) of 0.326 when
paragraphs are used as answers, making it presumably
the best-performing fully implemented why-QA system.
Experimental results also verified the usefulness of
the automatically acquired causal expression
patterns.",
acknowledgement = ack-nhfb,
articleno = "6",
keywords = "causal expression; pattern mining; question answering;
relation-annotated corpus",
}
@Article{Li:2008:ASV,
author = "Yaoyong Li and Kalina Bontcheva",
title = "Adapting Support Vector Machines for $F$-term-based
Classification of Patents",
journal = j-TALIP,
volume = "7",
number = "2",
pages = "7:1--7:??",
month = jun,
year = "2008",
CODEN = "????",
DOI = "http://doi.acm.org/10.1145/1362782.1362786",
ISSN = "1530-0226",
bibdate = "Mon Jun 16 17:12:23 MDT 2008",
bibsource = "http://portal.acm.org/",
abstract = "Support Vector Machines (SVM) have obtained
state-of-the-art results on many applications including
document classification. However, previous works on
applying SVMs to the $F$-term patent classification task
did not obtain as good results as other learning
algorithms such as k-NN. This is due to the fact that
$F$-term patent classification is different from
conventional document classification in several
aspects, mainly because it is a multiclass, multilabel
classification problem with semi-structured documents
and multi-faceted hierarchical categories.\par
This article describes our SVM-based system and several
techniques we developed successfully to adapt SVM for
the specific features of the $F$-term patent
classification task. We evaluate the techniques using
the NTCIR-6 $F$-term classification terms assigned to
Japanese patents. Moreover, our system participated in
the NTCIR-6 patent classification evaluation and
obtained the best results according to two of the three
metrics used for task performance evaluation. Following
the NTCIR-6 participation, we developed two new
techniques, which achieved even better scores using all
three NTCIR-6 metrics, effectively outperforming all
participating systems. This article presents this new
work and the experimental results that demonstrate the
benefits of the latest approach.",
acknowledgement = ack-nhfb,
articleno = "7",
keywords = "F-term classification; patent processing; support
vector machines",
}