<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">inform</journal-id><journal-title-group><journal-title xml:lang="ru">Информатика</journal-title><trans-title-group xml:lang="en"><trans-title>Informatics</trans-title></trans-title-group></journal-title-group><issn pub-type="ppub">1816-0301</issn><issn pub-type="epub">2617-6963</issn><publisher><publisher-name>UIIP NASB</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.37661/1816-0301-2025-22-2-33-47</article-id><article-id custom-type="elpub" pub-id-type="custom">inform-1351</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>ОБРАБОТКА СИГНАЛОВ, ИЗОБРАЖЕНИЙ, РЕЧИ, ТЕКСТА И РАСПОЗНАВАНИЕ ОБРАЗОВ</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>SIGNAL, IMAGE, SPEECH, TEXT PROCESSING AND PATTERN RECOGNITION</subject></subj-group></article-categories><title-group><article-title>Эффективное обнаружение зданий на изображениях дистанционного зондирования на основе улучшенной сети YOLOv10</article-title><trans-title-group xml:lang="en"><trans-title>Efficient detection of building in remote sensing images using an improved YOLOv10 network</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">https://orcid.org/0009-0003-6976-5386</contrib-id><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Ву</surname><given-names>С.</given-names></name><name name-style="western" xml:lang="en"><surname>Wu</surname><given-names>X.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Ву Сяньи, аспирант механико-математического факультета</p><p>пр. Независимости, 4, Минск, 220030</p></bio><bio xml:lang="en"><p>Xianyi Wu, Postgraduate Student of the Faculty of Mechanics and Mathematics</p><p>av. Nezavisimosti, 4, Minsk, 220030</p></bio><email xlink:type="simple">tigerv5872@gmail.com</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Абламейко</surname><given-names>С. В.</given-names></name><name name-style="western" xml:lang="en"><surname>Ablameyko</surname><given-names>Se. V.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Абламейко Сергей Владимирович, академик НАН Беларуси, доктор технических наук, профессор механико-математического факультета</p><p>ул. Сурганова, 6, Минск, 220012</p></bio><bio xml:lang="en"><p>Sergey V. Ablameyko, Acad. of the National Academy of Sciences of Belarus, D. Sc. (Eng.), Prof. of the Faculty of Mechanics and Mathematics</p><p>st. Surganova, 6, Minsk, 220012</p></bio><email xlink:type="simple">ablameyko@bsu.by</email><xref ref-type="aff" rid="aff-2"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru"><institution>Белорусский государственный университет</institution></aff><aff xml:lang="en"><institution>Belarusian State University</institution></aff></aff-alternatives><aff-alternatives id="aff-2"><aff xml:lang="ru"><institution>Объединенный институт проблем информатики Национальной академии наук</institution></aff><aff xml:lang="en"><institution>The United Institute of Informatics Problems of the National Academy of Sciences of Belarus</institution></aff></aff-alternatives><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>10</day><month>07</month><year>2025</year></pub-date><volume>22</volume><issue>2</issue><fpage>33</fpage><lpage>47</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Ву С., Абламейко С.В., 2025</copyright-statement><copyright-year>2025</copyright-year><copyright-holder xml:lang="ru">Ву С., Абламейко С.В.</copyright-holder><copyright-holder xml:lang="en">Wu X., Ablameyko S.V.</copyright-holder><license xml:lang="ru" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>Данная работа распространяется под лицензией Creative Commons Attribution 4.0.</license-p></license><license xml:lang="en" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://inf.grid.by/jour/article/view/1351">https://inf.grid.by/jour/article/view/1351</self-uri><abstract><p>Цели . В настоящее время быстрое определение местоположения и размера объектов зданий с помощью изображений дистанционного зондирования имеет важное научно-исследовательское и практическое значение для городского планирования, мониторинга окружающей среды и управления стихийными бедствиями.Методы . Предлагается метод обнаружения объектов на основе улучшенной сети YOLOv10, которая включает в себя механизм внимания Супертокен, модель RepConv (повторно параметризуемая свертка) и нормализованное взвешенное расстояние для более точного обнаружения зданий на изображениях дистанционного зондирования. Метод повышает точность и эффективность обнаружения, особенно для небольших объектов. Набор данных LEVIR-CD используется для обучения и тестирования модели.Результаты . Экспериментальные результаты показывают, что предлагаемый метод демонстрирует лучшую точность при решении задачи обнаружения зданий, чем традиционный YOLOv10 и другие методы.Заключение. Предлагаемый метод эффективно повышает точность и эффективность обнаружения зданий на изображениях дистанционного зондирования</p></abstract><trans-abstract xml:lang="en"><p>Objectives. At present, rapid detection of the location and size of building objects from remote sensing images is important for scientific research value and has practical significance for urban planning, environmental monitoring and disaster management.Methods. This paper proposes an object detection method based on improved YOLOv10 network, which incorporates Super Token Attention, RepConv and Normalized Weighted Distance to more precisely detect buildings in remote sensing images. This method improves the detection accuracy and efficiency especially for small objects. The LEVIR-CD dataset is used for model training and testing.Results. The experimental results show that the method demonstrates better accuracy on the building detection task than the traditional YOLOv10 and other methods.Conclusion. The proposed method significantly enhances the accuracy and efficiency of building detection in remote sensing images</p></trans-abstract><kwd-group xml:lang="ru"><kwd>YOLOv10</kwd><kwd>изображения дистанционного зондирования</kwd><kwd>механизм внимания</kwd><kwd>обнаружение зданий</kwd><kwd>RepConv</kwd><kwd>Super Token Attention</kwd></kwd-group><kwd-group xml:lang="en"><kwd>YOLOv10</kwd><kwd>remote sensing images</kwd><kwd>attention mechanism</kwd><kwd>building detection</kwd><kwd>RepConv</kwd><kwd>Super Token Attention</kwd></kwd-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Li S. T., Li C. Y., Kang X. D. Current status and future prospects of multi-source remote sensing image fusion. National Remote Sensing Bulletin, 2021, vol. 25, no. 1, pp. 148–166. DOI: 10.11834/jrs.20210259.</mixed-citation><mixed-citation xml:lang="en">Li S. T., Li C. Y., Kang X. D. Current status and future prospects of multi-source remote sensing image fusion. National Remote Sensing Bulletin, 2021, vol. 25, no. 1, pp. 148–166. DOI: 10.11834/jrs.20210259.</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Luo H. L., Wang W. X., Ye X. Y., Zhu S. X., Bai Y. Q. Research progress on directed object detection based on deep learning. Image and Signal Processing, 2024, vol. 13, no. 3, pp. 258–270. DOI: 10.12677/jisp.2024.133022.</mixed-citation><mixed-citation xml:lang="en">Luo H. L., Wang W. X., Ye X. Y., Zhu S. X., Bai Y. Q. Research progress on directed object detection based on deep learning. Image and Signal Processing, 2024, vol. 13, no. 3, pp. 258–270. DOI: 10.12677/jisp.2024.133022.</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Abdikan S., Bilgin G., Sanli F. B., Uslu E., Ustuner M. Enhancing land use classification with fusing dualpolarized terrasar-x and multispectral rapideye data. Journal of Applied Remote Sensing, 2015, vol. 9, no. 1, p. 096054. DOI: 10.1117/1.JRS.9.096054.</mixed-citation><mixed-citation xml:lang="en">Abdikan S., Bilgin G., Sanli F. B., Uslu E., Ustuner M. Enhancing land use classification with fusing dualpolarized terrasar-x and multispectral rapideye data. Journal of Applied Remote Sensing, 2015, vol. 9, no. 1, p. 096054. DOI: 10.1117/1.JRS.9.096054.</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Liu F. F., Zhu C. M., Zhao N. N., Wu J. H. Remote sensing small target detection based on multimodal fusion. Laser &amp; Optoelectronics Progress, 2024, vol. 61, no. 24, p. 2428010. DOI: 10.3788/LOP241203.</mixed-citation><mixed-citation xml:lang="en">Liu F. F., Zhu C. M., Zhao N. N., Wu J. H. Remote sensing small target detection based on multimodal fusion. Laser &amp; Optoelectronics Progress, 2024, vol. 61, no. 24, p. 2428010. DOI: 10.3788/LOP241203.</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Li J., Wei X. M. Research on efficient detection network method for remote sensing images based on selfattention mechanism. Image and Vision Computing, 2024, vol. 142, p. 104884. DOI: 10.1016/j.imavis.2023.104884.</mixed-citation><mixed-citation xml:lang="en">Li J., Wei X. M. Research on efficient detection network method for remote sensing images based on selfattention mechanism. Image and Vision Computing, 2024, vol. 142, p. 104884. DOI: 10.1016/j.imavis.2023.104884.</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Liu D., Zhong L., Wu H., Li S., Li Y. Remote sensing image super-resolution reconstruction by fusing multi-scale receptive fields and hybrid transformer. Scientific Reports, 2025, vol. 15, p. 2140. DOI: 10.1038/s41598-025-86446-5.</mixed-citation><mixed-citation xml:lang="en">Liu D., Zhong L., Wu H., Li S., Li Y. Remote sensing image super-resolution reconstruction by fusing multi-scale receptive fields and hybrid transformer. Scientific Reports, 2025, vol. 15, p. 2140. DOI: 10.1038/s41598-025-86446-5.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Li Z., Wang H., Ma G., Yang W., Ablameyko S. Effective small object detection in remote sensing images based on improved YOLOv8 network. Nonlinear Phenomena in Complex Systems, 2024, vol. 27, no. 3, pp. 278–291. DOI: 10.5281/zenodo.13960639.</mixed-citation><mixed-citation xml:lang="en">Li Z., Wang H., Ma G., Yang W., Ablameyko S. Effective small object detection in remote sensing images based on improved YOLOv8 network. Nonlinear Phenomena in Complex Systems, 2024, vol. 27, no. 3, pp. 278–291. DOI: 10.5281/zenodo.13960639.</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Tan M., Le Q. V. EfficientNetV2: Smaller models and faster training, 2021. Available at: https://arxiv.org/abs/2104.00298 (accessed 13.02.2025). DOI: 10.48550/arXiv.2104.00298. (Preprint).</mixed-citation><mixed-citation xml:lang="en">Tan M., Le Q. V. EfficientNetV2: Smaller models and faster training, 2021. Available at: https://arxiv.org/abs/2104.00298 (accessed 13.02.2025). DOI: 10.48550/arXiv.2104.00298. (Preprint).</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Woo S., Park J., Lee J. Y., Kweon I. S. CBAM: Convolutional Block Attention Module. Proceedings of the European Conference on Computer Vision (ECCV), Munich, Germany, 8–14 September 2018. Springer, Cham, 2018, pp. 3–19. DOI: 10.1007/978-3-030-01234-2_1.</mixed-citation><mixed-citation xml:lang="en">Woo S., Park J., Lee J. Y., Kweon I. S. CBAM: Convolutional Block Attention Module. Proceedings of the European Conference on Computer Vision (ECCV), Munich, Germany, 8–14 September 2018. Springer, Cham, 2018, pp. 3–19. DOI: 10.1007/978-3-030-01234-2_1.</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Wang A., Chen H., Lin Z., Han J., Ding G. RepViT: Revisiting mobile CNN from ViT perspective, 2023. Available at: https://arxiv.org/abs/2307.09283 (accessed 13.02.2025). DOI: 10.48550/arXiv.2307.09283. (Preprint).</mixed-citation><mixed-citation xml:lang="en">Wang A., Chen H., Lin Z., Han J., Ding G. RepViT: Revisiting mobile CNN from ViT perspective, 2023. Available at: https://arxiv.org/abs/2307.09283 (accessed 13.02.2025). DOI: 10.48550/arXiv.2307.09283. (Preprint).</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Dosovitskiy A., Beyer L., Kolesnikov A., Weissenborn D., Zhai X., …, Houlsby N. An image is worth 16×16 words: Transformers for image recognition at scale, 2020. Available at: https://arxiv.org/abs/2010.11929 (accessed 13.02.2025). DOI: 10.48550/arXiv.2010.11929. (Preprint).</mixed-citation><mixed-citation xml:lang="en">Dosovitskiy A., Beyer L., Kolesnikov A., Weissenborn D., Zhai X., …, Houlsby N. An image is worth 16×16 words: Transformers for image recognition at scale, 2020. Available at: https://arxiv.org/abs/2010.11929 (accessed 13.02.2025). DOI: 10.48550/arXiv.2010.11929. (Preprint).</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Wang X., Zhu D., Yan Y. Towards efficient detection for small objects via attention-guided detection network and data augmentation. Sensors, 2022, vol. 22, no. 19, p. 7663. DOI: 10.3390/s22197663.</mixed-citation><mixed-citation xml:lang="en">Wang X., Zhu D., Yan Y. Towards efficient detection for small objects via attention-guided detection network and data augmentation. Sensors, 2022, vol. 22, no. 19, p. 7663. DOI: 10.3390/s22197663.</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">Wang A., Chen H., Liu L. H., Chen K., Lin Z. J., …, Ding G. G. YOLOv10: Real-time end-to-end object detection, 2024. Available at: https://doi.org/10.48550/arXiv.2405.14458 (accessed 13.02.2025). DOI: 10.48550/arXiv.2405.14458. (Preprint).</mixed-citation><mixed-citation xml:lang="en">Wang A., Chen H., Liu L. H., Chen K., Lin Z. J., …, Ding G. G. YOLOv10: Real-time end-to-end object detection, 2024. Available at: https://doi.org/10.48550/arXiv.2405.14458 (accessed 13.02.2025). DOI: 10.48550/arXiv.2405.14458. (Preprint).</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Wang J., Xu C., Yang W., Yu L. A normalized Gaussian Wasserstein distance for tiny object detection, 2021. Available at: https://doi.org/10.48550/arXiv.2110.13389 (accessed 13.02.2025). DOI: 10.48550/arXiv.2110.13389. (Preprint).</mixed-citation><mixed-citation xml:lang="en">Wang J., Xu C., Yang W., Yu L. A normalized Gaussian Wasserstein distance for tiny object detection, 2021. Available at: https://doi.org/10.48550/arXiv.2110.13389 (accessed 13.02.2025). DOI: 10.48550/arXiv.2110.13389. (Preprint).</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">Huang H. B., Zhou X. Q., Cao J., He R., Tan T. N. Vision transformer with super token sampling, 2022. Available at: https://doi.org/10.48550/arXiv.2211.11167 (accessed 13.02.2025). DOI: 10.48550/arXiv.2211.11167. (Preprint).</mixed-citation><mixed-citation xml:lang="en">Huang H. B., Zhou X. Q., Cao J., He R., Tan T. N. Vision transformer with super token sampling, 2022. Available at: https://doi.org/10.48550/arXiv.2211.11167 (accessed 13.02.2025). DOI: 10.48550/arXiv.2211.11167. (Preprint).</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">Wan D. H., Lu R., Tian S., Xu T., Lang X., Ren Z. Mixed local channel attention for object detection. Engineering Applications of Artificial Intelligence, 2023, vol. 123, p. 106442. DOI: 10.1016/j.engappai.2023.106442.</mixed-citation><mixed-citation xml:lang="en">Wan D. H., Lu R., Tian S., Xu T., Lang X., Ren Z. Mixed local channel attention for object detection. Engineering Applications of Artificial Intelligence, 2023, vol. 123, p. 106442. DOI: 10.1016/j.engappai.2023.106442.</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Wang H., Ablameyko S. Enhancing small object detection in remote sensing images using mixed local channel attention with YOLOv8. Journal of Computer Technology and Applied Mathematics, 2024, vol. 1, no. 1, pp. 40–45. DOI: 10.5281/zenodo.10986298.</mixed-citation><mixed-citation xml:lang="en">Wang H., Ablameyko S. Enhancing small object detection in remote sensing images using mixed local channel attention with YOLOv8. Journal of Computer Technology and Applied Mathematics, 2024, vol. 1, no. 1, pp. 40–45. DOI: 10.5281/zenodo.10986298.</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Ding X., Zhang X., Ma N., Han J., Ding G., Sun J. RepVGG: Making VGG-style ConvNets great again. Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Nashville, TN, USA, 20–25 June 2021, рр. 13 733–13 742.</mixed-citation><mixed-citation xml:lang="en">Ding X., Zhang X., Ma N., Han J., Ding G., Sun J. RepVGG: Making VGG-style ConvNets great again. Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), Nashville, TN, USA, 20–25 June 2021, рр. 13 733–13 742.</mixed-citation></citation-alternatives></ref><ref id="cit19"><label>19</label><citation-alternatives><mixed-citation xml:lang="ru">Vasa V. K., Zhu W., Chen X., Qiu P., Dong X., Wang Y. STA-Unet: Rethink the semantic redundant for medical imaging segmentation, 2024. Available at: https://arxiv.org/pdf/2410.11578 (accessed 13.02.2025). DOI: 10.48550/arXiv.2410.11578. (Preprint).</mixed-citation><mixed-citation xml:lang="en">Vasa V. K., Zhu W., Chen X., Qiu P., Dong X., Wang Y. STA-Unet: Rethink the semantic redundant for medical imaging segmentation, 2024. Available at: https://arxiv.org/pdf/2410.11578 (accessed 13.02.2025). DOI: 10.48550/arXiv.2410.11578. (Preprint).</mixed-citation></citation-alternatives></ref><ref id="cit20"><label>20</label><citation-alternatives><mixed-citation xml:lang="ru">Yu Z., Huang H., Chen W., Su Y., Liu Y., Wang X. YOLO-FaceV2: A scale and occlusion aware face detector, 2022. Available at: https://arxiv.org/pdf/2208.02019v2 (accessed 13.02.2025). DOI: 10.48550/arXiv.2208.02019. (Preprint).</mixed-citation><mixed-citation xml:lang="en">Yu Z., Huang H., Chen W., Su Y., Liu Y., Wang X. YOLO-FaceV2: A scale and occlusion aware face detector, 2022. Available at: https://arxiv.org/pdf/2208.02019v2 (accessed 13.02.2025). DOI: 10.48550/arXiv.2208.02019. (Preprint).</mixed-citation></citation-alternatives></ref><ref id="cit21"><label>21</label><citation-alternatives><mixed-citation xml:lang="ru">Chen H., Shi Z. A spatial-temporal attention-based method and a new dataset for remote sensing image change detection. Remote Sensing, 2020, vol. 12, no. 10, p. 1662. DOI: 10.3390/rs12101662.</mixed-citation><mixed-citation xml:lang="en">Chen H., Shi Z. A spatial-temporal attention-based method and a new dataset for remote sensing image change detection. Remote Sensing, 2020, vol. 12, no. 10, p. 1662. DOI: 10.3390/rs12101662.</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
