<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">inform</journal-id><journal-title-group><journal-title xml:lang="ru">Информатика</journal-title><trans-title-group xml:lang="en"><trans-title>Informatics</trans-title></trans-title-group></journal-title-group><issn pub-type="ppub">1816-0301</issn><issn pub-type="epub">2617-6963</issn><publisher><publisher-name>UIIP NASB</publisher-name></publisher></journal-meta><article-meta><article-id custom-type="elpub" pub-id-type="custom">inform-336</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>ОБРАБОТКА СИГНАЛОВ, ИЗОБРАЖЕНИЙ, РЕЧИ, ТЕКСТА И РАСПОЗНАВАНИЕ ОБРАЗОВ</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>SIGNAL, IMAGE, SPEECH, TEXT PROCESSING AND PATTERN RECOGNITION</subject></subj-group></article-categories><title-group><article-title>ПРИМЕНЕНИЕ МГНОВЕННОГО ГАРМОНИЧЕСКОГО АНАЛИЗА ДЛЯ АНТРОПОМОРФИЧЕСКОЙ ОБРАБОТКИ РЕЧЕВЫХ СИГНАЛОВ</article-title><trans-title-group xml:lang="en"><trans-title></trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Лихачев</surname><given-names>Д. С.</given-names></name></name-alternatives><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Азаров</surname><given-names>И. С.</given-names></name></name-alternatives><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Петровский</surname><given-names>А. А.</given-names></name></name-alternatives><xref ref-type="aff" rid="aff-1"/></contrib></contrib-group><aff xml:lang="ru" id="aff-1"><institution>Белорусский государственный университет информатики и радиоэлектроники</institution><country>Belarus</country></aff><pub-date pub-type="collection"><year>2011</year></pub-date><pub-date pub-type="epub"><day>05</day><month>04</month><year>2018</year></pub-date><volume>0</volume><issue>4(32)</issue><fpage>59</fpage><lpage>70</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Лихачев Д.С., Азаров И.С., Петровский А.А., 2018</copyright-statement><copyright-year>2018</copyright-year><copyright-holder xml:lang="ru">Лихачев Д.С., Азаров И.С., Петровский А.А.</copyright-holder><copyright-holder xml:lang="en">Лихачев Д.С., Азаров И.С., Петровский А.А.</copyright-holder><license xml:lang="ru" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>Данная работа распространяется под лицензией Creative Commons Attribution 4.0.</license-p></license><license xml:lang="en" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://inf.grid.by/jour/article/view/336">https://inf.grid.by/jour/article/view/336</self-uri><abstract><p>Рассматривается способ параметрического описания звукового сигнала, основанный на антропоморфической интерпретации его частотных составляющих. Для получения параметров модели предлагается использовать мгновенный гармонический анализ вместо дискретного преобразования Фурье. В работе оценивается точность полученного описания. Приводятся экспериментальные результаты, показывающие, что реконструкция сигнала в большой степени зависит от средств получения частотно-временного описания, причем предложенный способ обеспечивает более высокое качество реконструкции сигнала по сравнению с известными методами.</p></abstract></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Morgan, N. Does ASR have a PHD, or is it just piled higher and deeper? / N. Morgan [Electronic resource]. – Mode of access : http://superlectures.com/icassp2011/lecture.php?id=206&amp;lang=en. – Date of access : 21.10.2011.</mixed-citation><mixed-citation xml:lang="en">Morgan, N. Does ASR have a PHD, or is it just piled higher and deeper? / N. Morgan [Electronic resource]. – Mode of access : http://superlectures.com/icassp2011/lecture.php?id=206&amp;lang=en. – Date of access : 21.10.2011.</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">A Perceptual Model for Sinusoidal Audio Coding Based on Spectral Integration / S. van de Par [et. al.] // EURASIP Journal on Applied Signal Processing. – 2005. – Vol. 2005, № 9. – P. 1292–1304.</mixed-citation><mixed-citation xml:lang="en">A Perceptual Model for Sinusoidal Audio Coding Based on Spectral Integration / S. van de Par [et. al.] // EURASIP Journal on Applied Signal Processing. – 2005. – Vol. 2005, № 9. – P. 1292–1304.</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Ravindran, S. A Physiologically Inspired Method for Audio Classification / S. Ravindran,</mixed-citation><mixed-citation xml:lang="en">Ravindran, S. A Physiologically Inspired Method for Audio Classification / S. Ravindran,</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">K. Chlemmer, D.V. Anderson // EURASIP Journal on Applied Signal Processing. – 2005. –</mixed-citation><mixed-citation xml:lang="en">K. Chlemmer, D.V. Anderson // EURASIP Journal on Applied Signal Processing. – 2005. –</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Vol. 2005, № 9. – P. 1374–1381.</mixed-citation><mixed-citation xml:lang="en">Vol. 2005, № 9. – P. 1374–1381.</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Feldbauer, C. Anthropomorphic Coding of Speech and Audio: A Model Inversion Approach / C. Feldbauer, G. Kubin, W.B. Kleijn // EURASIP Journal on Applied Signal Processing. – 2005. – Vol. 2005, № 9. – P. 1334–1349.</mixed-citation><mixed-citation xml:lang="en">Feldbauer, C. Anthropomorphic Coding of Speech and Audio: A Model Inversion Approach / C. Feldbauer, G. Kubin, W.B. Kleijn // EURASIP Journal on Applied Signal Processing. – 2005. – Vol. 2005, № 9. – P. 1334–1349.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Ghitza, O. Auditory Models and Human Performance in Tasks Related to Speech Coding and Speech Recognition / O. Ghitza // IEEE Transactions on Speech and Audio Processing. – 1994. – Vol. 2, № 1. – P. 115–132.</mixed-citation><mixed-citation xml:lang="en">Ghitza, O. Auditory Models and Human Performance in Tasks Related to Speech Coding and Speech Recognition / O. Ghitza // IEEE Transactions on Speech and Audio Processing. – 1994. – Vol. 2, № 1. – P. 115–132.</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Ivanov, A.V. Analysis of the IHC Adaptation for the Anthropomorphic Speech Processing</mixed-citation><mixed-citation xml:lang="en">Ivanov, A.V. Analysis of the IHC Adaptation for the Anthropomorphic Speech Processing</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Systems / A.V. Ivanov, A.A. Petrovsky // EURASIP Journal on Applied Signal Processing. – 2005. – Vol. 2005, № 9. – P. 1323–1333.</mixed-citation><mixed-citation xml:lang="en">Systems / A.V. Ivanov, A.A. Petrovsky // EURASIP Journal on Applied Signal Processing. – 2005. – Vol. 2005, № 9. – P. 1323–1333.</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Лихачев, Д.С. Анализ и синтез устройств кодирования речевого сигнала на основе ан</mixed-citation><mixed-citation xml:lang="en">Лихачев, Д.С. Анализ и синтез устройств кодирования речевого сигнала на основе ан</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">тропоморфической обработки и синусоидальных моделей / Д.С. Лихачев, А.А. Петровский // Доклады БГУИР. – 2006. – № 3 (15). – C. 35–43.</mixed-citation><mixed-citation xml:lang="en">тропоморфической обработки и синусоидальных моделей / Д.С. Лихачев, А.А. Петровский // Доклады БГУИР. – 2006. – № 3 (15). – C. 35–43.</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Слуховая система / Я.А. Альтман [и др.] ; под общ. ред. Я.А. Альтмана. – Л. : Наука,</mixed-citation><mixed-citation xml:lang="en">Слуховая система / Я.А. Альтман [и др.] ; под общ. ред. Я.А. Альтмана. – Л. : Наука,</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">– 620 с.</mixed-citation><mixed-citation xml:lang="en">– 620 с.</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Likhachov, D.S. Improved auditory-based speech coding using psychoacoustic model based on a cochlear filter bank and an average localized synchrony detection / D.S. Likhachov, A.A. Petrovsky // Computer information systems and industrial management applications ; eds. K. Saeed, R. Mosdorf, Z. Sosnowski. – Poland : Bialystok, 2003. – P. 11–19.</mixed-citation><mixed-citation xml:lang="en">Likhachov, D.S. Improved auditory-based speech coding using psychoacoustic model based on a cochlear filter bank and an average localized synchrony detection / D.S. Likhachov, A.A. Petrovsky // Computer information systems and industrial management applications ; eds. K. Saeed, R. Mosdorf, Z. Sosnowski. – Poland : Bialystok, 2003. – P. 11–19.</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">Лихачев, Д.С. Компрессия речевого сигнала на основе синусоидальной модели с ан-</mixed-citation><mixed-citation xml:lang="en">Лихачев, Д.С. Компрессия речевого сигнала на основе синусоидальной модели с ан-</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">тропоморфической обработкой / Д.С. Лихачев, А.А. Петровский // Анализаторы речевых и звуковых сигналов: методы, алгоритмы и практика (с MATLAB-примерами) ; под ред. д.т.н. профессора А.А. Петровского. – Минск : Бестпринт, 2009. – С. 211–233.</mixed-citation><mixed-citation xml:lang="en">тропоморфической обработкой / Д.С. Лихачев, А.А. Петровский // Анализаторы речевых и звуковых сигналов: методы, алгоритмы и практика (с MATLAB-примерами) ; под ред. д.т.н. профессора А.А. Петровского. – Минск : Бестпринт, 2009. – С. 211–233.</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Азаров, И.С. Вычисление мгновенных гармонических параметров речевого сигнала / И.С. Азаров, А.А. Петровский // Речевые технологии. – 2008. – № 1 (1). – C. 67–77</mixed-citation><mixed-citation xml:lang="en">Азаров, И.С. Вычисление мгновенных гармонических параметров речевого сигнала / И.С. Азаров, А.А. Петровский // Речевые технологии. – 2008. – № 1 (1). – C. 67–77</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Ghitza, O. Adequacy of auditory models to predict internal human representation of speech sounds / O. Ghitza // J. Acoust. Soc. Am. – 1993. – Vol. 93, № 4. – P. 2160–2171.</mixed-citation><mixed-citation xml:lang="en">Ghitza, O. Adequacy of auditory models to predict internal human representation of speech sounds / O. Ghitza // J. Acoust. Soc. Am. – 1993. – Vol. 93, № 4. – P. 2160–2171.</mixed-citation></citation-alternatives></ref><ref id="cit19"><label>19</label><citation-alternatives><mixed-citation xml:lang="ru">An anthropomorphic speech processing based on the cochlear model and its application for coding task / A.A. Petrovsky [et al.] // International scientific journal of computing. – 2004. – Vol. 3, № 1. – P. 75–83.</mixed-citation><mixed-citation xml:lang="en">An anthropomorphic speech processing based on the cochlear model and its application for coding task / A.A. Petrovsky [et al.] // International scientific journal of computing. – 2004. – Vol. 3, № 1. – P. 75–83.</mixed-citation></citation-alternatives></ref><ref id="cit20"><label>20</label><citation-alternatives><mixed-citation xml:lang="ru">Wan, W.G. A two-dimentional non-linear cochlear model for speech processing: response to pure tones / W.G. Wan, A.A. Petrovsky, C.X. Fan // 6th Intern. Fase-Congress. – Zurich, Switzerland, 1992. – P. 233–236.</mixed-citation><mixed-citation xml:lang="en">Wan, W.G. A two-dimentional non-linear cochlear model for speech processing: response to pure tones / W.G. Wan, A.A. Petrovsky, C.X. Fan // 6th Intern. Fase-Congress. – Zurich, Switzerland, 1992. – P. 233–236.</mixed-citation></citation-alternatives></ref><ref id="cit21"><label>21</label><citation-alternatives><mixed-citation xml:lang="ru">Wan, W.G. A new solution for cochlear macromechanics / W.G. Wan, C.X. Fan // Acustica. –</mixed-citation><mixed-citation xml:lang="en">Wan, W.G. A new solution for cochlear macromechanics / W.G. Wan, C.X. Fan // Acustica. –</mixed-citation></citation-alternatives></ref><ref id="cit22"><label>22</label><citation-alternatives><mixed-citation xml:lang="ru">– Vol. 75. – P. 79–82.</mixed-citation><mixed-citation xml:lang="en">– Vol. 75. – P. 79–82.</mixed-citation></citation-alternatives></ref><ref id="cit23"><label>23</label><citation-alternatives><mixed-citation xml:lang="ru">Greenwood, D.D. A cochlear frequency-position function for several species-29 years later / D.D. Greenwood // J. Acoust. Soc. Am. – 1990. – Vol. 87, № 6. – P. 2592–2605.</mixed-citation><mixed-citation xml:lang="en">Greenwood, D.D. A cochlear frequency-position function for several species-29 years later / D.D. Greenwood // J. Acoust. Soc. Am. – 1990. – Vol. 87, № 6. – P. 2592–2605.</mixed-citation></citation-alternatives></ref><ref id="cit24"><label>24</label><citation-alternatives><mixed-citation xml:lang="ru">Petrovsky, A.A. A digital cochlear model as a base of anthropomorphic speech processing / A.A. Petrovsky, D.S. Likhachov // Neural networks and artificial intelligence : proc. of the 3d Intern. Conf., Belarus, Minsk, November 12–14, 2003. – Minsk, 2003. – P. 126–131.</mixed-citation><mixed-citation xml:lang="en">Petrovsky, A.A. A digital cochlear model as a base of anthropomorphic speech processing / A.A. Petrovsky, D.S. Likhachov // Neural networks and artificial intelligence : proc. of the 3d Intern. Conf., Belarus, Minsk, November 12–14, 2003. – Minsk, 2003. – P. 126–131.</mixed-citation></citation-alternatives></ref><ref id="cit25"><label>25</label><citation-alternatives><mixed-citation xml:lang="ru">Лихачев, Д.С. Антропоморфический анализ на основе дискретного преобразования</mixed-citation><mixed-citation xml:lang="en">Лихачев, Д.С. Антропоморфический анализ на основе дискретного преобразования</mixed-citation></citation-alternatives></ref><ref id="cit26"><label>26</label><citation-alternatives><mixed-citation xml:lang="ru">Фурье с неравномерной частотной шкалой / Д.С. Лихачев // Известия Белорусской инженерной академии. – 2005. – № 1 (19)/2. – С. 177–180.</mixed-citation><mixed-citation xml:lang="en">Фурье с неравномерной частотной шкалой / Д.С. Лихачев // Известия Белорусской инженерной академии. – 2005. – № 1 (19)/2. – С. 177–180.</mixed-citation></citation-alternatives></ref><ref id="cit27"><label>27</label><citation-alternatives><mixed-citation xml:lang="ru">McAulay, R.J. Low-rate speech coding based on the sinusoidal model / R.J. McAulay,</mixed-citation><mixed-citation xml:lang="en">McAulay, R.J. Low-rate speech coding based on the sinusoidal model / R.J. McAulay,</mixed-citation></citation-alternatives></ref><ref id="cit28"><label>28</label><citation-alternatives><mixed-citation xml:lang="ru">T.F. Quatieri // Advances in Speech Signal Processing ; eds. S. Furui, M.M. Sondhi. – N.Y. : Marcel Dekker, 1992. – P. 165–208.</mixed-citation><mixed-citation xml:lang="en">T.F. Quatieri // Advances in Speech Signal Processing ; eds. S. Furui, M.M. Sondhi. – N.Y. : Marcel Dekker, 1992. – P. 165–208.</mixed-citation></citation-alternatives></ref><ref id="cit29"><label>29</label><citation-alternatives><mixed-citation xml:lang="ru">McAulay, R.J. Speech analysis/synthesis based on a sinusoidal representation / R.J. McAulay, T.F. Quatieri // IEEE Trans. on Acoust., Speech and Signal Processing. – 1986. – Vol. ASSP-34. – P. 744–754.</mixed-citation><mixed-citation xml:lang="en">McAulay, R.J. Speech analysis/synthesis based on a sinusoidal representation / R.J. McAulay, T.F. Quatieri // IEEE Trans. on Acoust., Speech and Signal Processing. – 1986. – Vol. ASSP-34. – P. 744–754.</mixed-citation></citation-alternatives></ref><ref id="cit30"><label>30</label><citation-alternatives><mixed-citation xml:lang="ru">Азаров, И.С. Непрерывное и дискретное гармонические преобразования для декомпозиции речевого сигнала на периодическую и шумовую компоненты / И.С. Азаров, А.А. Петровский // Доклады БГУИР. – 2008. – № 4 (34). – C. 92–105.</mixed-citation><mixed-citation xml:lang="en">Азаров, И.С. Непрерывное и дискретное гармонические преобразования для декомпозиции речевого сигнала на периодическую и шумовую компоненты / И.С. Азаров, А.А. Петровский // Доклады БГУИР. – 2008. – № 4 (34). – C. 92–105.</mixed-citation></citation-alternatives></ref><ref id="cit31"><label>31</label><citation-alternatives><mixed-citation xml:lang="ru">Petrovsky, A. Combining advanced sinusoidal and waveform matching models for parametric audio/speech coding / A. Petrovsky, E. Azarov, A. Petrovsky // EUSIPCO 2009 : proc. of the 17th European Signal Processing Conf. – Glasgow, 2009. – P. 436–440.</mixed-citation><mixed-citation xml:lang="en">Petrovsky, A. Combining advanced sinusoidal and waveform matching models for parametric audio/speech coding / A. Petrovsky, E. Azarov, A. Petrovsky // EUSIPCO 2009 : proc. of the 17th European Signal Processing Conf. – Glasgow, 2009. – P. 436–440.</mixed-citation></citation-alternatives></ref><ref id="cit32"><label>32</label><citation-alternatives><mixed-citation xml:lang="ru">ITU-T Recommendation P.862, PESQ an objective method for end-to-end speech quality</mixed-citation><mixed-citation xml:lang="en">ITU-T Recommendation P.862, PESQ an objective method for end-to-end speech quality</mixed-citation></citation-alternatives></ref><ref id="cit33"><label>33</label><citation-alternatives><mixed-citation xml:lang="ru">assessment of narrowband telephone networks and speech codecs, February 2001.</mixed-citation><mixed-citation xml:lang="en">assessment of narrowband telephone networks and speech codecs, February 2001.</mixed-citation></citation-alternatives></ref><ref id="cit34"><label>34</label><citation-alternatives><mixed-citation xml:lang="ru">Yang, W. Enhanced Modified Bark Spectral Distortion (EMBSD): an Objective Speech</mixed-citation><mixed-citation xml:lang="en">Yang, W. Enhanced Modified Bark Spectral Distortion (EMBSD): an Objective Speech</mixed-citation></citation-alternatives></ref><ref id="cit35"><label>35</label><citation-alternatives><mixed-citation xml:lang="ru">Quality Measure Based on Audible Distortion and Cognition Model (PhD Thesis) / W. Yang [Electronic resource]. – Mode of access : http://www.temple.edu/speech_lab/wonhos_dissertation.pdf. –</mixed-citation><mixed-citation xml:lang="en">Quality Measure Based on Audible Distortion and Cognition Model (PhD Thesis) / W. Yang [Electronic resource]. – Mode of access : http://www.temple.edu/speech_lab/wonhos_dissertation.pdf. –</mixed-citation></citation-alternatives></ref><ref id="cit36"><label>36</label><citation-alternatives><mixed-citation xml:lang="ru">Date of access : 21.10.2011.</mixed-citation><mixed-citation xml:lang="en">Date of access : 21.10.2011.</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
