The Centre for Speech Technology Research, The university of Edinburgh

Publications by Michael Berger

s0788136.bib

@misc{Carnival_SIGGRAPH_2010,
  author = {Berger, Michael and Hofer, Gregor and Shimodaira, Hiroshi},
  title = {Carnival: a modular framework for automated facial animation},
  year = {2010},
  note = {Bronze award winner, ACM Student Research Competition},
  address = {Los Angeles, Calif., USA},
  howpublished = {Poster at SIGGRAPH 2010},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2010/carnival.pdf},
  abtract = {We present a software framework for speech- or text-driven animation--including a platform-independent API and an application implementing it--which unifies state-of-the-art speech technology and graphics technology within a single system.}
}
@inproceedings{felps_interspeech2010,
  author = {Felps, Daniel and Geng, Christian and Berger, Michael and Richmond, Korin and Gutierrez-Osuna, Ricardo},
  title = {Relying on critical articulators to estimate vocal tract spectra in an articulatory-acoustic database},
  booktitle = {Proc. Interspeech},
  abstract = {We present a new phone-dependent feature weighting scheme that can be used to map articulatory configurations (e.g. EMA) onto vocal tract spectra (e.g. MFCC) through table lookup. The approach consists of assigning feature weights according to a feature's ability to predict the acoustic distance between frames. Since an articulator's predictive accuracy is phone-dependent (e.g., lip location is a better predictor for bilabial sounds than for palatal sounds), a unique weight vector is found for each phone. Inspection of the weights reveals a correspondence with the expected critical articulators for many phones. The proposed method reduces overall cepstral error by 6\% when compared to a uniform weighting scheme. Vowels show the greatest benefit, though improvements occur for 80\% of the tested phones.},
  month = {September},
  year = {2010},
  keywords = {speech production, speech synthesis},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2010/IS100076.pdf},
  pages = {1990--1993}
}
@article{McGowanBerger2009,
  author = {McGowan, Richard S. and Berger, Michael A.},
  title = {Acoustic-articulatory mapping in vowels by locally weighted regression},
  journal = {Journal of the Acoustical Society of America},
  number = {4},
  abstract = {A method for mapping between simultaneously measured articulatory and acoustic data is proposed. The method uses principal components analysis on the articulatory and acoustic variables, and mapping between the domains by locally weighted linear regression, or loess [Cleveland, W. S. (1979) J. Am. Stat. Assoc. 74, 829--836]. The latter method permits local variation in the slopes of the linear regression, assuming that the function being approximated is smooth. The methodology is applied to vowels of four speakers in the Wisconsin X-ray Microbeam Speech Production Database, with formant analysis. Results are examined in terms of (1) examples of forward (articulation-to-acoustics) mappings and inverse mappings, (2) distributions of local slopes and constants, (3) examples of correlations among slopes and constants, (4) root-mean-square error, and (5) sensitivity of formant frequencies to articulatory change. It is shown that the results are qualitatively correct and that loess performs better than global regression. The forward mappings show different root-mean-square error properties than the inverse mappings indicating that this method is better suited for the forward mappings than the inverse mappings, at least for the data chosen for the current study. Some preliminary results on sensitivity of the first two formant frequencies to the two most important articulatory principal components are presented.},
  volume = {126},
  year = {2009},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2009/aam.pdf},
  pages = {2011-2032},
  categories = {Articulatory inversion, locally weighted regression, X-ray microbeam, formant analysis}
}
@misc{Hofer_Berger:sigg2010,
  author = {Hofer, Gregor and Richmond, Korin and Berger, Michael},
  howpublished = {Poster at Siggraph 2010},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2010/lipsync-sig10.pdf},
  year = {2010},
  title = {Lip Synchronization by Acoustic Inversion},
  address = {Los Angeles, USA}
}
@article{10.1109/MCG.2011.71,
  author = {Berger, Michael A. and Hofer, Gregor and Shimodaira, Hiroshi},
  publisher = {IEEE Computer Society},
  doi = {10.1109/MCG.2011.71},
  title = {Carnival -- Combining Speech Technology and Computer Animation},
  journal = {IEEE Computer Graphics and Applications},
  issn = {0272-1716},
  volume = {31},
  year = {2011},
  pages = {80-89},
  address = {Los Alamitos, CA, USA}
}