<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">inform</journal-id><journal-title-group><journal-title xml:lang="ru">Информатика</journal-title><trans-title-group xml:lang="en"><trans-title>Informatics</trans-title></trans-title-group></journal-title-group><issn pub-type="ppub">1816-0301</issn><issn pub-type="epub">2617-6963</issn><publisher><publisher-name>UIIP NASB</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.37661/1816-0301-2020-17-2-44-53</article-id><article-id custom-type="elpub" pub-id-type="custom">inform-1068</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>БИОИНФОРМАТИКА</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>BIOINFORMATICS</subject></subj-group></article-categories><title-group><article-title>Предсказание структуры гомодимерных белковых комплексов на основе глубокой нейронной сети</article-title><trans-title-group xml:lang="en"><trans-title>Protein homodimers structure prediction based on deep neural network</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Хадарович</surname><given-names>А. Ю.</given-names></name><name name-style="western" xml:lang="en"><surname>Hadarovich</surname><given-names>A. Y.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Хадарович Анна Юрьевна, научный сотрудник</p><p>Минск</p></bio><bio xml:lang="en"><p>Anna Y. Hadarovich, Researcher</p><p>Minsk</p></bio><email xlink:type="simple">ahadarovich@gmail.com</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Калиновский</surname><given-names>А. А.</given-names></name><name name-style="western" xml:lang="en"><surname>Kalinouski</surname><given-names>A. A.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Калиновский Александр Александрович, научный сотрудник</p><p>Минск</p></bio><bio xml:lang="en"><p>Alexander A. Kalinouski, Researcher</p><p>Minsk</p></bio><email xlink:type="simple">gakarak@gmail.com</email><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Тузиков</surname><given-names>А. В.</given-names></name><name name-style="western" xml:lang="en"><surname>Tuzikov</surname><given-names>A. V.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Тузиков Александр Васильевич, член-корреспондент, доктор физико-математических наук, профессор, генеральный директор</p><p>Минск</p></bio><bio xml:lang="en"><p>Alexander V. Tuzikov, Corresponding Member, Dr. Sci. (Phys.-Math.), Professor, General Director</p><p>Minsk</p></bio><email xlink:type="simple">tuzikov@newman.bas-net.by</email><xref ref-type="aff" rid="aff-2"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru"><institution>Объединенный институт проблем информатики Национальной академии наук Беларуси; Белорусский государственный университет</institution></aff><aff xml:lang="en"><institution>The United Institute of Informatics Problems of the National Academy of Sciences of Belarus; Belarusian State University</institution></aff></aff-alternatives><aff-alternatives id="aff-2"><aff xml:lang="ru"><institution>Объединенный институт проблем информатики Национальной академии наук Беларуси</institution></aff><aff xml:lang="en"><institution>The United Institute of Informatics Problems of the National Academy of Sciences of Belarus</institution></aff></aff-alternatives><pub-date pub-type="collection"><year>2020</year></pub-date><pub-date pub-type="epub"><day>07</day><month>05</month><year>2020</year></pub-date><volume>17</volume><issue>2</issue><fpage>44</fpage><lpage>53</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Хадарович А.Ю., Калиновский А.А., Тузиков А.В., 2020</copyright-statement><copyright-year>2020</copyright-year><copyright-holder xml:lang="ru">Хадарович А.Ю., Калиновский А.А., Тузиков А.В.</copyright-holder><copyright-holder xml:lang="en">Hadarovich A.Y., Kalinouski A.A., Tuzikov A.V.</copyright-holder><license xml:lang="ru" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>Данная работа распространяется под лицензией Creative Commons Attribution 4.0.</license-p></license><license xml:lang="en" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://inf.grid.by/jour/article/view/1068">https://inf.grid.by/jour/article/view/1068</self-uri><abstract><p>Предсказание структур белковых комплексов имеет важные приложения в таких областях, как моделирование биологических процессов и разработка лекарственных средств. Гомодимеры (комплексы, состоящие из двух одинаковых белков) являются наиболее распространенным типом белковых комплексов в природе, но до сих пор нет универсального алгоритма для предсказания их трехмерных структур. Экспериментальные методы для определения структур белковых комплексов требуют значительных затрат времени и ресурсов и имеют свои ограничения. Глубокие нейронные сети позволили предсказать структуры отдельных белков, значительно превзойдя по точности другие алгоритмические подходы. Опираясь на идею этого подхода, в статье разработан алгоритм для моделирования трехмерной структуры гомодимерных комплексов на основе глубокого обучения. Он состоит из двух основных этапов: на первом этапе прогнозируется карта контактов белкового комплекса при помощи глубокой сверточной нейронной сети, а на втором предсказывается трехмерная структура гомодимера на основе полученной карты контактов и процедуры оптимизации. Предложенный подход был протестирован и проверен на наборе белковых гомодимеров из базы данных белков PDB (Protein Data Bank). Разработанная методика может быть использована для оценки моделей белковых гомодимеров в качестве одного из этапов разработки лекарственных соединений.</p></abstract><trans-abstract xml:lang="en"><p>Structural prediction of protein-protein complexes has important application in such domains as modeling of biological processes and drug design. Homodimers (complexes which consist of two identical proteins) are the most common type of protein complexes in nature but there is still no universal algorithm to predict their 3D structures. Experimental techniques to identify the structure of protein complex require enormous amount of time and resources, and each method has its own limitations. Recently Deep Neural Networks allowed to predict structures of individual proteins greatly prevailing in accuracy over other algorithmic approaches. Building on the idea of this approach, we developed an algorithm to model the 3D structure of homodimer based on deep learning. It consists of two major steps: at the first step a protein complex contact map is predicted with the deep convolutional neural network, and the second stage is used to predict 3D structure of homodimer based on obtained contact map and optimization procedure. The use of the neural network in combination with optimization procedure based on gradient descent method allowed to predict structures for protein homodimers. The suggested approach was tested and validated on a dataset of protein homodimers from Protein Data Bank (PDB). The developed procedure could be also used for evaluating protein homodimer models as one of the stages in drug compounds developing.</p></trans-abstract><kwd-group xml:lang="ru"><kwd>методы глубокого обучения</kwd><kwd>нейронные сети</kwd><kwd>моделирование белковых комплексов</kwd><kwd>гомодимеры</kwd><kwd>оптимизация</kwd></kwd-group><kwd-group xml:lang="en"><kwd>deep learning</kwd><kwd>neural network</kwd><kwd>modeling of protein complexes</kwd><kwd>homodimers</kwd><kwd>optimization</kwd></kwd-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Anfinsen C. B. Principles that govern the folding of protein chains. Science, 1973, vol. 181 (4096), pp. 223–230. https://doi.org/10.1126/science.181.4096.223</mixed-citation><mixed-citation xml:lang="en">Anfinsen C. B. Principles that govern the folding of protein chains. Science, 1973, vol. 181 (4096), pp. 223–230. https://doi.org/10.1126/science.181.4096.223</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Lecun Y., Bengio Y., Hinton G. Deep learning. Nature, 2015, vol. 521 (7553), pp. 436–444. https://doi.org/10.1038/nature14539</mixed-citation><mixed-citation xml:lang="en">Lecun Y., Bengio Y., Hinton G. Deep learning. Nature, 2015, vol. 521 (7553), pp. 436–444. https://doi.org/10.1038/nature14539</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Senior A. W., Evans R., Jumper J., Kirkpatrick J., Sifre L., …, Hassabis D. Improved protein structure prediction using potentials from deep learning. Nature, 2020, vol. 577 (7792), pp. 706–710. https://doi.org/10.1038/s41586-019-1923-7</mixed-citation><mixed-citation xml:lang="en">Senior A. W., Evans R., Jumper J., Kirkpatrick J., Sifre L., …, Hassabis D. Improved protein structure prediction using potentials from deep learning. Nature, 2020, vol. 577 (7792), pp. 706–710. https://doi.org/10.1038/s41586-019-1923-7</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Billings W. M., Hedelius B., Millecam T., Wingate D., Corte D. D. ProSPr: democratized implementation of alphafold protein distance prediction network. Biorxiv, 2019, p. 830273. https://doi.org/10.1101/830273</mixed-citation><mixed-citation xml:lang="en">Billings W. M., Hedelius B., Millecam T., Wingate D., Corte D. D. ProSPr: democratized implementation of alphafold protein distance prediction network. Biorxiv, 2019, p. 830273. https://doi.org/10.1101/830273</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Kryshtafovych A. T., Schwede, Topf M., Fidelis K., Moult J. Critical assessment of methods of protein structure prediction (CASP) – Round XIII. Proteins: Structure, Function, and Bioinformatics, 2019, vol. 87 (12), pp. 1011–1020. https://doi.org/10.1002/prot.25823</mixed-citation><mixed-citation xml:lang="en">Kryshtafovych A. T., Schwede, Topf M., Fidelis K., Moult J. Critical assessment of methods of protein structure prediction (CASP) – Round XIII. Proteins: Structure, Function, and Bioinformatics, 2019, vol. 87 (12), pp. 1011–1020. https://doi.org/10.1002/prot.25823</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Jones D. T., Kandathil S. M. High precision in protein contact prediction using fully convolutional neural networks and minimal sequence features. Bioinformatics, 2018, vol. 34, pp. 3308–3315.</mixed-citation><mixed-citation xml:lang="en">Jones D. T., Kandathil S. M. High precision in protein contact prediction using fully convolutional neural networks and minimal sequence features. Bioinformatics, 2018, vol. 34, pp. 3308–3315.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Seemayer S., Gruber M., Söding J. CCMpred – fast and precise prediction of protein residue-residue contacts from correlated mutations. Bioinformatics, 2014, vol. 30 (21), pp. 3128–3130.</mixed-citation><mixed-citation xml:lang="en">Seemayer S., Gruber M., Söding J. CCMpred – fast and precise prediction of protein residue-residue contacts from correlated mutations. Bioinformatics, 2014, vol. 30 (21), pp. 3128–3130.</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Jones D. T., Singh T., Kosciolek T., Tetchner S. MetaPSICOV: combining coevolution methods for accurate prediction of contacts and long range hydrogen bonding in proteins. Bioinformatics, 2015, vol. 31 (7), pp. 999–1006.</mixed-citation><mixed-citation xml:lang="en">Jones D. T., Singh T., Kosciolek T., Tetchner S. MetaPSICOV: combining coevolution methods for accurate prediction of contacts and long range hydrogen bonding in proteins. Bioinformatics, 2015, vol. 31 (7), pp. 999–1006.</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Jiang Q., Jin X., Lee S.-J., Yao S. Protein secondary structure prediction: a survey of the state of the art. Journal of Molecular Graphics and Modelling, 2017, vol. 76, pp. 379–402. https://doi.org/10.1016/j.jmgm.2017.07.015</mixed-citation><mixed-citation xml:lang="en">Jiang Q., Jin X., Lee S.-J., Yao S. Protein secondary structure prediction: a survey of the state of the art. Journal of Molecular Graphics and Modelling, 2017, vol. 76, pp. 379–402. https://doi.org/10.1016/j.jmgm.2017.07.015</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Skwark M. J., Raimondi D., Michel M., Elofsson A. Improved contact predictions using the recognition of protein like contact patterns. PLoS Computational Biology, 2014, vol. 10 (11), p. e1003889. https://doi.org/10.1371/journal.pcbi.1003889</mixed-citation><mixed-citation xml:lang="en">Skwark M. J., Raimondi D., Michel M., Elofsson A. Improved contact predictions using the recognition of protein like contact patterns. PLoS Computational Biology, 2014, vol. 10 (11), p. e1003889. https://doi.org/10.1371/journal.pcbi.1003889</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Berman H. M. The protein data bank: a historical perspective. Acta Crystallographica Section A: Foundations of Crystallography, 2008, vol. 64 (1), pp. 88–95. https://doi.org/10.1107/S0108767307035623</mixed-citation><mixed-citation xml:lang="en">Berman H. M. The protein data bank: a historical perspective. Acta Crystallographica Section A: Foundations of Crystallography, 2008, vol. 64 (1), pp. 88–95. https://doi.org/10.1107/S0108767307035623</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Mou Y., Huang P.-S., Hsu F.-C., Huang S.-J., Mayo S. L. Computational design and experimental verification of a symmetric protein homodimer. Proceedings of the National Academy of Sciences of the United States of America, 2015, vol. 112 (34), pp. 10714–10719. https://doi.org/10.1073/pnas.1505072112</mixed-citation><mixed-citation xml:lang="en">Mou Y., Huang P.-S., Hsu F.-C., Huang S.-J., Mayo S. L. Computational design and experimental verification of a symmetric protein homodimer. Proceedings of the National Academy of Sciences of the United States of America, 2015, vol. 112 (34), pp. 10714–10719. https://doi.org/10.1073/pnas.1505072112</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">Long J., Shelhamer E., Darrell T. Fully convolutional networks for semantic segmentation. Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, Boston, Massachusetts, USA, 7–12 June 2015. Boston, 2015, pp. 3431–3440. https://doi.org/10.1109/CVPR.2015.7298965</mixed-citation><mixed-citation xml:lang="en">Long J., Shelhamer E., Darrell T. Fully convolutional networks for semantic segmentation. Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, Boston, Massachusetts, USA, 7–12 June 2015. Boston, 2015, pp. 3431–3440. https://doi.org/10.1109/CVPR.2015.7298965</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Fu J., Liu J., Tian H., Li Y., Bao Y., …, Lu H. Dual attention network for scene segmentation. Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, Long Beach, California, USA, 15–20 June 2019. Long Beach, 2019, pp. 3141–3149. Available at: http://arxiv.org/abs/1809.02983 (accessed 27.03.2020).</mixed-citation><mixed-citation xml:lang="en">Fu J., Liu J., Tian H., Li Y., Bao Y., …, Lu H. Dual attention network for scene segmentation. Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, Long Beach, California, USA, 15–20 June 2019. Long Beach, 2019, pp. 3141–3149. Available at: http://arxiv.org/abs/1809.02983 (accessed 27.03.2020).</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">He K., Zhang X., Ren S., Sun J. Deep residual learning for image recognition. Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, Las Vegas, Nevada, USA, 27–30 June 2016. Las Vegas, 2016, pp. 770–778. https://doi.org/10.1109/CVPR.2016.90</mixed-citation><mixed-citation xml:lang="en">He K., Zhang X., Ren S., Sun J. Deep residual learning for image recognition. Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, Las Vegas, Nevada, USA, 27–30 June 2016. Las Vegas, 2016, pp. 770–778. https://doi.org/10.1109/CVPR.2016.90</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">Chen L.-C., Papandreou G., Kokkinos I., Murphy K., Yuille A. L. DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2018, vol. 40 (4), pp. 834–848. https://doi.org/10.1109/TPAMI.2017.2699184</mixed-citation><mixed-citation xml:lang="en">Chen L.-C., Papandreou G., Kokkinos I., Murphy K., Yuille A. L. DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2018, vol. 40 (4), pp. 834–848. https://doi.org/10.1109/TPAMI.2017.2699184</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Kingma D. P., Ba J. Adam: a method for stochastic optimization. 3rd International Conference on Learning Representations (ICLR 2015), San Diego, CA, USA, 7–9 May 2015. San Diego, 2015. Available at: https://arxiv.org/abs/1412.6980 (accessed 27.03.2020).</mixed-citation><mixed-citation xml:lang="en">Kingma D. P., Ba J. Adam: a method for stochastic optimization. 3rd International Conference on Learning Representations (ICLR 2015), San Diego, CA, USA, 7–9 May 2015. San Diego, 2015. Available at: https://arxiv.org/abs/1412.6980 (accessed 27.03.2020).</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Mitternacht S. FreeSASA: an open source C library for solvent accessible surface area calculations. F1000Research, 2016, vol. 5, p. 189. https://doi.org/10.12688/f1000research.7931.1</mixed-citation><mixed-citation xml:lang="en">Mitternacht S. FreeSASA: an open source C library for solvent accessible surface area calculations. F1000Research, 2016, vol. 5, p. 189. https://doi.org/10.12688/f1000research.7931.1</mixed-citation></citation-alternatives></ref><ref id="cit19"><label>19</label><citation-alternatives><mixed-citation xml:lang="ru">Janin J., Bahadur R. P., Chakrabarti P. Protein-protein interaction and quaternary structure. Quarterly Reviews of Biophysics, 2008, vol. 41 (2), pp. 133–180. https://doi.org/10.1017/S0033583508004708</mixed-citation><mixed-citation xml:lang="en">Janin J., Bahadur R. P., Chakrabarti P. Protein-protein interaction and quaternary structure. Quarterly Reviews of Biophysics, 2008, vol. 41 (2), pp. 133–180. https://doi.org/10.1017/S0033583508004708</mixed-citation></citation-alternatives></ref><ref id="cit20"><label>20</label><citation-alternatives><mixed-citation xml:lang="ru">Cock P. J., Antao T., Chang J. T., Chapman B. A., Cox C. J., …, de Hoon M. J. Biopython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics, 2009, vol. 25 (11), pp. 1422–1423. https://doi.org/10.1093/bioinformatics/btp163</mixed-citation><mixed-citation xml:lang="en">Cock P. J., Antao T., Chang J. T., Chapman B. A., Cox C. J., …, de Hoon M. J. Biopython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics, 2009, vol. 25 (11), pp. 1422–1423. https://doi.org/10.1093/bioinformatics/btp163</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
