The Centre for Speech Technology Research, The university of Edinburgh

Publications by Joanna Rownicka

s1569548.bib

@inproceedings{rownicka17,
  author = {Rownicka, Joanna and Renals, Steve and Bell, Peter},
  title = {Simplifying very deep convolutional neural network architectures for robust speech recognition},
  booktitle = {Proc. 2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), Okinawa, Japan},
  month = {December},
  year = {2017},
  pdf = {http://www.cstr.inf.ed.ac.uk/downloads/publications/2017/rownicka_asru17.pdf},
  abstract = {Very deep convolutional neural networks (VDCNNs) have been successfully used in computer vision. More recently VDCNNs have been applied to speech recognition, using architectures adopted from computer vision. In this paper, we experimentally analyse the role of the components in VDCNN architectures for robust speech recognition. We have proposed a number of simplified VDCNN architectures, taking into account the use of fully-connected layers and down-sampling approaches. We have investigated three ways to down-sample feature maps: max-pooling, average-pooling, and convolution with increased stride. Our proposed model consisting solely of convolutional (conv) layers, and without any fully-connected layers, achieves a lower word error rate on Aurora 4 compared to other VDCNN architectures typically used in speech recognition. We have also extended our experiments to the MGB-3 task of multi-genre broadcast recognition using BBC TV recordings. The MGB-3 results indicate that the same architecture achieves the best result among our VDCNNs on this task as well.},
  categories = {Robust Speech Recognition, Very Deep Convolutional Neural Networks, Aurora 4, MGB Challenge}
}