

@article{ title={A Survey on Deep Learning Frameworks Testing},
author={MA Xiang-Yue, DU Xiao-Ting, CAI Qing, ZHENG Yang, HU Zheng, ZHENG Zheng},
first-institution={Beihang University}
journal={Ruan Jian Xue Bao/Journal of Software},
url={} }






  1. Islam MJ, Nguyen G, Pan R, Rajan H. A comprehensive study on deep learning bug characteristics. In: Proc. of the 2019 27th ACM Joint Meeting on European Software Engineering Conf. and Symp. on the Foundations of Software Engineering (ESEC/FSE). Tallinn Estonia: ACM, 2019: 510–520.[paper]
  2. Du XT, Sui YL, Liu ZH, Ai J. An empirical study of fault triggers in deep learning frameworks. IEEE Trans. on Dependable and Secure Computing, 2022: 1–1.[paper]
  3. Du XT, Xiao GP, Sui YL. Fault triggers in the tensorflow framework: an experience report. In: 2020 IEEE 31st Int’l Symp. on Software Reliability Engineering (ISSRE). Coimbra, Portugal: IEEE, 2020: 1–12.[paper]
  4. Jia L, Zhong H, Wang XY, Huang LP, Lu XS. The symptoms, causes, and repairs of bugs inside a deep learning library. Journal of Systems and Software, 2021, 177: 110935.[paper]
  5. Chen JJ, Liang YH, Shen QC, Jiang JJ, Li SC. Toward understanding deep learning framework bugs. ACM Trans. on Software Engineering and Methodology, 2023: 1049-331X.[paper]
  6. Quan LL, Guo QY, Xie XF, Chen S, Li XH, Liu Y. Towards understanding the faults of javascript-based deep learning systems. In: Proc. of the 37th IEEE/ACM Int'l Conf. on Automated Software Engineering (ASE). New York, NY, USA: ACM, 2022: 1–13.[paper]
  7. Yang YL, He TX, Xia ZL, Feng Y. A comprehensive empirical study on bug characteristics of deep learning frameworks. Information and Software Technology, 2022, 151: 107004.[paper]
  8. Sun XB, Zhou TC, Li GJ, Hu JJ, Yang H, Li B. An empirical study on real bugs for machine learning programs. In: 2017 24th Asia-Pacific Software Engineering Conf. (APSEC). 2017: 348–357.[paper]
  9. Zhang TY, Gao CY, Ma L, Lyu M, Kim M. An empirical study of common challenges in developing deep learning applications. In: 2019 IEEE 30th Int'l Symp. on Software Reliability Engineering (ISSRE). 2019: 104–115.[paper]


  1. Makkouk T, Kim DJ, Chen T-H P. An empirical study on performance bugs in deep learning frameworks. In: 2022 IEEE Int'l Conf. on Software Maintenance and Evolution (ICSME). 2022: 35–46.[paper]
  2. Ren Y, Gay G, Kästner C, Jamshidi P. Understanding the nature of system-related issues in machine learning frameworks: an exploratory study. arXiv Preprint arXiv:2005.06091, 2020.[paper]
  3. Long GM, Chen T. On reporting performance and accuracy bugs for deep learning frameworks: an exploratory study from github. arXiv Preprint arXiv:2204.07893, 2022.[paper]
  4. Tambon F, Nikanjam A, An L, Khomh F, Antoniol G. ***Silent bugs in deep learning frameworks: an empirical study of keras and tensorflow.***arXiv Preprint arXiv:2112.13314, 2021.[paper]
  5. Liu ZH, Zheng Y, Du XT, Hu Z, Ding WJ, Yanming M, Zheng Z. Taxonomy of aging-related bugs in deep learning libraries. In: 2022 IEEE 33rd Int'l Symp. on Software Reliability Engineering (ISSRE). Charlotte, NC, USA: 2022: 423–434.[paper]
  6. Kloberdanz E, Kloberdanz KG., Le W. DeepStability: a study of unstable numerical methods and their solutions in deep learning. In: Proc. of the 44th Int'l Conf. on Software Engineering (ICSE). Pittsburgh Pennsylvania: ACM, 2022: 586–597.[paper]
  7. Huang KF, Chen BH, Wu SS, Cao JM, Ma L, Peng X. Demystifying dependency bugs in deep learning stack. arXiv Preprint arXiv:2207.10347, 2022.[paper]
  8. Liu JK, Huang Q, Xia X, Shihab E, Lo D, Li SP. University Zhejiang. Is using deep learning frameworks free? characterizing technical debt in deep learning frameworks. In Proc. of the ACM/IEEE 42nd Int'l Conf. on Software Engineering: Software Engineering in Society (ICSE-SEIS). 2020: 1-10.[paper]
  9. Chen HS, Zhang YP, Cao YR, Xie J. Security issues and defensive approaches in deep learning frameworks. Tsinghua Science and Technology, 2021, 26(6): 894–905.[paper]
  10. ARXIV: Xiao QX, Li K, Zhang DY, Xu WL. Security risks in deep learning implementations. arXiv Preprint arXiv:1711.11008, 2017.[paper]
  11. Filus K, Domańska J. Software vulnerabilities in tensorflow-based deep learning applications. Computers & Security, 2023, 124: 102948.[paper]
  12. Harzevili NS, Shin J, Wang JJ, Wang S. Characterizing and understanding software security vulnerabilities in machine learning libraries. arXiv Preprint arXiv.2203.06502, 2022.[paper]
  13. Long GM, Chen T, Cosma G. Multifaceted hierarchical report identification for non-functional bugs in deep learning frameworks. arXiv Preprint arXiv:2210.01855, 2022.[paper]



  1. Pham HV, Lutellier T, Qi WZ, Tan L. CRADLE: cross-backend validation to detect and localize bugs in deep learning libraries. In: 2019 IEEE/ACM 41st Int'l Conf. on Software Engineering (ICSE). Montreal, QC, Canada: IEEE, 2019: 1027–1038.[paper]
  2. Wang Z, Yan M, Chen JJ, Liu S, Zhang DD. Deep learning library testing via effective model generation. In: Proc. of the 28th ACM Joint Meeting on European Software Engineering Conf. and Symp. on the Foundations of Software Engineering. Virtual Event USA: ACM, 2020: 788–799.[paper]
  3. Guo QY, Xie XF, Li Y, Zhang XY, Liu Y, Li XH, Shen C. Audee: automated testing for deep learning frameworks. In: Proc. of the 35th IEEE/ACM Int’l Conf. on Automated Software Engineering. Virtual Event Australia: ACM, 2020: 486–498.[paper]
  4. Li M, Cao JL, Tian YQ, Li TO, Wen M, Cheung S-C. COMET: coverage-guided model generation for deep learning library testing. arXiv Preprint arXiv:2208.01508, 2022.[paper]
  5. Liu JW, Peng JJ, Wang YY, Zhang LM. NeuRI: diversifying dnn generation via inductive rule inference. arXiv Preprint arXiv:2302.02261, 2023.[paper]
  6. Shen XZ, Zhang JY, Wang XN, Yu HF, Sun G. Deep learning framework fuzzing based on model mutation. In: 2021 IEEE Sixth Int’l Conf. on Data Science in Cyberspace (DSC). 2021: 375–380.[paper]
  7. Li JQ, Li SY, Wu JW, Luo L, Bai Y, Yu HF. MMOS: multi-staged mutation operator scheduling for deep learning library testing. 2022 IEEE Global Communications Confenrence (GLOBECOM). 2022: 6103–6108.[paper]
  8. Wu JW, Li SY, Li JQ, Luo L, Yu HF, Sun G. DeepCov: coverage guided deep learning framework fuzzing. In: 2022 7th IEEE Int’l Conf. on Data Science in Cyberspace (DSC). 2022: 399–404.[paper]
  9. Schumi R, Sun J. ExAIS: executable ai semantics. In: Proc. of the 44th Int’l Conf. on Software Engineering (ICSE). New York, NY, USA: ACM, 2022: 859–870.[paper]
  10. Zou YL, Sun HF, Fang CR, Liu JW, Zhang ZP. Deep learning framework testing via hierarchical and heuristic model generation. Journal of Systems and Software, 2023, 201: 111681.[paper]


  1. Gu JZ, Luo XC, Zhou YF, Wang X. Muffin: testing deep learning libraries via neural architecture fuzzing. In: 2022 IEEE/ACM 44th Int'l Conf. on Software Engineering (ICSE). 2022: 1418–1430.[paper]
  2. Wang JN, Lutellier T, Qian SS, Pham HV, Tan L. EAGLE: creating equivalent graphs to test deep learning libraries. In: Proc. of the 44th Int'l Conf. on Software Engineering (ICSE). Pittsburgh Pennsylvania: ACM, 2022: 798–810.[paper]
  3. Luo WS, Chai D, Ruan XY, Wang J, Fang CR, Chen ZY. Graph-based fuzz testing for deep learning inference engines. In: 2021 IEEE/ACM 43rd Int'l Conf. on Software Engineering (ICSE). 2021: 288–299.[paper]


  1. Christou N, Jin D, Atlidakis V, Ray B, Kemerlis V. P. IvySyn: automated vulnerability discovery for deep learning frameworks. arXiv Preprint arXiv:2209.14921, 2022.[paper]
  2. Xie DN, Li YT, Kim M, Pham HV, Tan L, Zhang XY, Godfrey MW. DocTer: documentation guided fuzzing for testing deep learning api functions. In: Proc. of the 31st ACM SIGSOFT Int'l Symp. on Software Testing and Analysis (ISSTA). New York, NY, USA: Association for Computing Machinery, 2022: 176–188.[paper]
  3. Deng YL, Xia CS, Peng HR, Yang CY, Zhang LM. Large language models are zero-shot fuzzers: fuzzing deep-learning libraries via large language models. In: Proc. of the 32nd ACM SIGSOFT Int’l Symp. on Software Testing and Analysis (ISSTA). New York, NY, USA: ACM, 2023: 423–435.[paper]
  4. Deng YL, Yang CY, Wei AJ, Zhang LM. Fuzzing deep-learning libraries via automated relational API inference. In: Proc. of the 30th ACM Joint European Software Engineering Conf. and Symp. on the Foundations of Software Engineering (ESEC/FSE 2022). New York, NY, USA:ACM,2022:44–56.[paper]
  5. Wei AJ, Deng YL, Yang CY, Zhang LM. Free lunch for testing: fuzzing deep-learning libraries from open source. In: Proc. of the 44th Int'l Conf. on Software Engineering (ICSE). New York, NY, USA: ACM, 2022: 995–1007.[paper]
  6. Yang CY, Deng YL, Yao JY, Tu YX, Li HC, Zhang LM. Fuzzing automatic differentiation in deep-learning libraries. arXiv Preprint arXiv:2302.04351, 2023.[paper]
  7. Kang HJ, Rattanukul P, Haryono SA, Nguyen TG, Ragkhitwetsagul C, Pasareanu C, Lo D. SkipFuzz: active learning-based input selection for fuzzing deep learning libraries. arXiv Preprint arXiv:2212.04038, 2022.[paper]
  8. Deng YL, Xia CS, Yang CY, Zhang SD, Yang SJ, Zhang LM. Large language models are edge-case fuzzers: testing deep learning libraries via fuzzgpt. arXiv Preprint arXiv: 2304.02014, 2023.[paper]
  9. Zhang XF, Liu JW, Sun N, Fang CR, Liu J, Wang J, Chai D, Chen ZY. Duo: differential fuzzing for deep learning operators. IEEE Trans. on Reliability, 2021, 70(4): 1671–1685.[paper]
  10. Zhang XF, Sun N, Fang CR, Liu JW, Liu J, Chai D, Wang J, Chen ZY. Predoo: precision testing of deep learning operators. In: Proc. of the 30th ACM SIGSOFT Int’l Symp. on Software Testing and Analysis (ISSTA). Virtual Denmark: ACM, 2021: 400–412.[paper]
  11. Gu DD, Shi YN, Liu XZ, Wu G, Jiang HO, Zhao YS,Ma Y. Defect detection for deep learning frameworks based on meta operators. Chinese Journal of Computers, 2022,45(02):240-255 (in Chinese with English abstract).[paper]
  12. Shi JY, Xiao Y, Li YK, Li YT, Yu DS, Yu CD, Su H, Chen YF, Huo W. ACETest: automated constraint extraction for testing deep learning operators. In: Proc. of the 32nd ACM SIGSOFT Int’l Symp. on Software Testing and Analysis(ISSTA). New York, NY, USA: ACM, 2023: 690–702.[paper]