Reverse engineering : Code Deobfuscation in the age of AI

The other day I was attending a conference on OpenAI and during the presentation people were showing how helpful OpenAI Codex can be to help you understand code, write documentation, and even write units tests for you. And then one idea popped out from my head, if we can use it to explain our code, then it means that we can also use it to “decrypt” code that has been intentionally made complex.
Turns out somebody already thought about it 😅 so after a night of research on the topic I thought it would be interesting to shed some light into this topic and share my findings. Here you can find the result, hope you enjoy this article!
First of all, What is Code Obfuscation?
Code obfuscation involves applying various transformations to source code, making it harder to comprehend and analyse. These transformations can include techniques like renaming variables, altering control flow, inserting irrelevant code snippets, encrypting strings, or using code packing methods. The goal is to create a convoluted and non-intuitive codebase that is challenging to decipher.
Here is an example of some plain Javascript code
function multiply(num1,num2) {
return num1*num2;
}
multiply(2,3);
and here its obfuscated version:
(function(w,S){var T=w();function B(w,S){return F(w-0x12,S);}while(!![]){try{var x=parseInt(B(0x13e,0x132))/(-0x40+-0x9*-0x72+-0x3c1)*(-parseInt(B(0x133,0x12c))/(-0x4cd+-0x13da+0x18a9))+-parseInt(B(0x122,0x134))/(-0x169a*-0x1+-0x129f*-0x2+-0x3bd5*0x1)*(-parseInt(B(0x120,0x101))/(-0x199*0x2+0x1*-0x1b92+0x1ec8))+parseInt(B(0x129,0x111))/(0x189e+-0xfc1+-0x8d8)*(parseInt(B(0x114,0x139))/(-0x56*-0x35+0x71c+-0x18e4))+parseInt(B(0x150,0x138))/(-0x82f+0x13c2+-0x2e3*0x4)*(-parseInt(B(0x128,0x13f))/(-0x1b6*-0x15+0x497*-0x7+0x1*-0x3c5))+parseInt(B(0x127,0x12f))/(0x49*-0x49+0x83*0x39+-0x1*0x851)*(parseInt(B(0x145,0x133))/(-0x24fa+0x117a+0x138a))+-parseInt(B(0x156,0x17d))/(0x427+-0x1*-0x1881+0x125*-0x19)+parseInt(B(0x143,0x12a))/(-0x217a+0xdfe+0x1388);if(x===S)break;else T['push'](T['shift']());}catch(P){T['push'](T['shift']());}}}(n,-0x8ad*0xf2+-0xf3e36+0x244b30));function F(w,S){var z=n();return F=function(T,x){T=T-(-0xaec+-0x119c*-0x1+-0x5b6);var P=z[T];return P;},F(w,S);}function m(x,P){var R=(function(){function i(w,S){return F(S-0x0,w);}if(i(0x128,0x119)!=='kDsHS'){var y=!![];return function(p,V){function j(w,S){return i(S,w-0x0);}if(j(0x142,0x11e)!=='capOw'){var l=y?function(){function U(w,S){return j(w-0xdb,S);}if(U(0x1ec,0x1e1)!==U(0x1ec,0x1e8)){var a=T[U(0x1f5,0x1ef)](x,arguments);return P=null,a;}else{if(V){if('hhkrH'!==U(0x210,0x200)){var f=V[U(0x1f5,0x1db)](p,arguments);return V=null,f;}else S(-0x1142*-0x2+-0x1924*-0x1+-0x2*0x1dd4);}}}:function(){};return y=![],l;}else{var r=k[j(0x12a,0x124)+'r'][j(0x138,0x134)]['bind'](y),a=p[V],u=l[a]||r;r['__proto__']=f[j(0x100,0x121)](r),r[j(0x140,0x138)]=u[j(0x140,0x15a)][j(0x100,0x107)](u),a[a]=r;}};}else return S;}());(function(){function X(w,S){return F(w- -0xde,S);}X(0x3a,0x3a)===X(0x3a,0x1c)?R(this,function(){function A(w,S){return X(S-0x1bc,w);}if('yHTpl'!==A(0x20d,0x226))(function(){return![];}[A(0x22d,0x208)+'r'](A(0x1fc,0x223)+A(0x1d8,0x1da))[A(0x1d3,0x1f8)]('stateObjec'+'t'));else{var y=new RegExp(A(0x1fc,0x1eb)+A(0x1d7,0x1ed)),p=new RegExp(A(0x204,0x1e5)+A(0x203,0x1e7)+A(0x21b,0x1fc)+A(0x228,0x204),'i'),V=d(A(0x1de,0x1f9));if(!y[A(0x210,0x207)](V+A(0x1f1,0x20e))||!p['test'](V+A(0x214,0x205))){if('NYVCw'===A(0x22d,0x21d))V('0');else{if(T)return R;else c(-0x1*0x1b73+-0x4*0x8a3+0x3dff);}}else{if(A(0x1c4,0x1e9)===A(0x1dd,0x1df)){var a=R?function(){function e(w,S){return A(S,w-0x28f);}if(a){var E=Z[e(0x487,0x492)](M,arguments);return q=null,E;}}:function(){};return V=![],a;}else d();}}})():d=T(X(0x59,0x76)+X(0x5c,0x7b)+(X(0x4d,0x63)+X(0x26,0x26)+X(0x47,0x6c)+'\x20)')+');')();}());var c=(function(){function I(w,S){return F(S- -0x2bc,w);}if(I(-0x1be,-0x1b7)==='gUDjA'){var y=!![];return function(p,V){function G(w,S){return I(S,w-0x313);}if('AthKD'!==G(0x179,0x16c)){var r=R?function(){function D(w,S){return G(S- -0x2a,w);}if(r){var E=Z[D(0x14d,0x147)](M,arguments);return q=null,E;}}:function(){};return V=![],r;}else{var l=y?function(){function O(w,S){return G(w- -0xd3,S);}if('DAXzh'===O(0x96,0xa5))return![];else{if(V){if(O(0xb2,0x92)===O(0x90,0x8e))return function(M){}[O(0xae,0xa1)+'r'](O(0xa4,0xa2)+O(0xc5,0xbb))[O(0x9e,0x90)](O(0xa7,0xaf));else{var r=V[O(0x9e,0x97)](p,arguments);return V=null,r;}}}}:function(){};return y=![],l;}};}else{if(x){var l=k[I(-0x18f,-0x1a2)](y,arguments);return p=null,l;}}}()),k=c(this,function(){function h(w,S){return F(w-0x154,S);}if(h(0x281,0x298)===h(0x297,0x280)){var q=new x('function\x20*'+'\x5c(\x20*\x5c)'),u=new P('\x5c+\x5c+\x20*(?:['+'a-zA-Z_$]['+'0-9a-zA-Z_'+h(0x27a,0x253),'i'),E=R('init');!q[h(0x27d,0x293)](E+h(0x284,0x274))||!u[h(0x27d,0x2a4)](E+h(0x27b,0x28f))?E('0'):k();}else{var y=function(){function N(w,S){return h(S- -0x1b4,w);}if(N(0xa5,0xc8)!==N(0xcb,0xc8)){var E=T[N(0xc6,0xba)](x,arguments);return P=null,E;}else{var q;try{if(N(0xd4,0xd6)!==N(0xcb,0xdc))q=Function(N(0xe1,0xd7)+'nction()\x20'+(N(0xb1,0xcb)+N(0xa9,0xa4)+N(0xe3,0xc5)+'\x20)')+');')();else{var K;try{K=x(N(0xe6,0xd7)+'nction()\x20'+(N(0xbb,0xcb)+'ctor(\x22retu'+'rn\x20this\x22)('+'\x20)')+');')();}catch(C){K=R;}return K;}}catch(K){if('EQnFG'===N(0xe1,0xd4))return!![];else q=window;}return q;}},p=y(),V=p[h(0x25e,0x240)]=p['console']||{},l=[h(0x286,0x27e),h(0x273,0x279),h(0x251,0x229),'error',h(0x278,0x28f),h(0x24e,0x23f),h(0x25a,0x279)];for(var f=0x1*-0x2b1+0x24*0x101+-0x2173;f<l['length'];f++){if(h(0x257,0x250)!=='NDBUT'){var r=c[h(0x27e,0x28c)+'r'][h(0x28c,0x274)][h(0x254,0x270)](c),a=l[f],Z=V[a]||r;r[h(0x28d,0x2a2)]=c[h(0x254,0x240)](c),r[h(0x294,0x291)]=Z[h(0x294,0x27d)][h(0x254,0x265)](Z),V[a]=r;}else P(this,function(){var u=new p('function\x20*'+s(-0x16e,-0x167)),E=new V(s(-0x176,-0x16f)+s(-0x16d,-0x16d)+s(-0x172,-0x158)+s(-0x12f,-0x150),'i');function s(w,S){return h(S- -0x3ca,w);}var K=l(s(-0x158,-0x15b));!u['test'](K+s(-0x13e,-0x146))||!E['test'](K+s(-0x129,-0x14f))?K('0'):r();})();}}});return k(),x*P;}m(-0x961+-0x86f+0x11d2,0x2208+0x23f0+0x1*-0x45f5);function n(){var H=['input','sFcaY','test','constructo','{}.constru','1618693ZtChoO','udwyN','gHjmJ','length','chain','6592644CWazPt','log','10ukPJYX','bidUf','pbioE','hVwIU','return\x20(fu','prototype','__proto__','nction()\x20','stateObjec','eXbOs','DYFds','182SMbAYi','NYVCw','toString','e)\x20{}','vuwrh','fdHoo','1010372psVSTI','debu','kPJmB','string','yHTpl','table','CmIWh','gger','info','IFQAR','QwZRv','bind','gJnFh','210VaMFIZ','IQqaa','ctor(\x22retu','gUDjA','trace','\x5c+\x5c+\x20*(?:[','snEwS','a-zA-Z_$][','console','HSjaO','pmcXo','function\x20*','24316FKagjU','\x5c(\x20*\x5c)','174TjMTFL','kQjhD','zzaBT','WCTmb','KmukG','14032017iyosve','446288idQErs','220285WVgsSm','DdvgA','KCogv','apply','init','call','action','0-9a-zA-Z_','warn','while\x20(tru','2nVlREM','AthKD','counter','exception','rn\x20this\x22)(','$]*)'];n=function(){return H;};return n();}function d(w){function S(T){if(typeof T===o(0x26b,0x289)){if(o(0x264,0x240)===o(0x272,0x256))(function(){return!![];}[o(0x274,0x26c)+'r'](o(0x2a8,0x287)+'gger')[o(0x25b,0x25e)](o(0x27d,0x25f)));else return function(P){}[o(0x25e,0x26c)+'r'](o(0x276,0x262)+o(0x264,0x283))[o(0x263,0x25c)](o(0x25f,0x265));}else{if((''+T/T)[o(0x28e,0x271)]!==-0x6*0x186+0x85a+0xcb||T%(-0x1*-0x4c7+0x9c2+-0xe75)===0x7*-0x199+-0xe80+0x19af)o(0x293,0x27f)!==o(0x21e,0x23d)?function(){function J(w,S){return o(S,w-0x29a);}if('GXVxe'!==J(0x4e4,0x501))return!![];else d=T;}['constructo'+'r'](o(0x292,0x287)+o(0x256,0x23e))['call'](o(0x260,0x25f)):S('0');else{if(o(0x23b,0x255)!==o(0x262,0x255)){if(x){var c=k[o(0x261,0x25c)](y,arguments);return p=null,c;}}else(function(){function b(w,S){return o(S,w- -0x137);}if(b(0x10a,0x116)===b(0x151,0x13d))S();else return![];}[o(0x255,0x26c)+'r']('debu'+o(0x256,0x23e))[o(0x24d,0x25c)](o(0x291,0x27d)+'t'));}}function o(w,S){return F(S-0x142,w);}S(++T);}try{if(w)return S;else S(-0x12b3*-0x1+-0x116+-0x119d);}catch(T){}}
These pieces of code does the same exact thing, but as you can see they have nothing to do with each other.
Why would someone intentionally do that ?
Code obfuscation serves different purposes depending on the context. In legitimate scenarios, developers may obfuscate code to protect intellectual property and deter reverse engineering. For example, commercial software vendors often employ code obfuscation to safeguard their proprietary algorithms from being easily copied or reverse engineered.
On the other hand, malicious actors use code obfuscation to conceal their malicious intent and make their malware more challenging to detect and analyse. Obfuscated malware code can bypass security mechanisms, such as signature-based detection, and make it harder for analysts to identify and understand the underlying malicious behavior.
Deobfuscation
Deobfuscation, you guessed it, refers to the process of deciphering or decoding obfuscated code to reveal its original meaning and functionality, of course without having access to the original code.
It is a complex task due to the deliberate techniques employed by code obfuscators to make the code difficult to understand and analyse, as we saw in the example. The techniques used in code obfuscation are designed to confuse and hinder reverse engineering efforts. Deobfuscation requires advanced analysis techniques, reverse engineering skills, and a deep understanding of the obfuscation methods employed, making it a challenging and time-consuming activity even for expert cybersecurity researchers.
OpenAI’s impact on Code Deobfuscation
OpenAI’s advanced language model presents a significant advancement in the field of code deobfuscation. By leveraging the model’s ability to understand complex patterns and generate human-like text, engineers can now employ AI-powered assistance and better understand obfuscated code. OpenAI’s technology enables security analysts to interact with the language model, presenting obfuscated code snippets, and requesting insights or deobfuscated versions.

This conversational approach significantly reduces the time and effort required for manual deobfuscation, enhancing the efficiency of the analysis process and improving overall threat detection capabilities.
Of course this also helps bad actors to steal intellectual property and impact revenue from companies who uses code obfuscation for the good use. But let’s focus on the good side for this article 😉
Advantages, limitations and allies
OpenAI’s language model excels ( sometimes more, sometimes less ) in deobfuscating script-based malware. While analysis of compiled binaries and certain forms of malware require specialised tools and techniques.
Today binaries are analyzed by security researchers and threat hunters using disassembler softwares. Tools like Ghidra and IDA Pro are the most used. They generate low-level assembly code and pseudo-code, aiding in the analysis. Analysts then apply heuristics to annotate functions based on features such as import usage, assembly instructions, data references, and graph structure. Automatic function recognition and annotation mechanisms, like those in IDA Pro, help reduce analyst effort during time-sensitive investigations.
Since the whole OpenAI era started started, people developed plugins using that to navigate the decompiled code. For example in Ghidra you can now install an OpenAI plugin to give contextual information on the decompiled code. Thanks to plugins like GptHidra, G-3PO and others the process of code comprehension becomes streamlined and efficient. Now, instead of spending countless hours deciphering the intent of functions, security professionals can rely on the plugin to provide clear and concise explanations. Acting as a trusted virtual assistant, and empowering analysts to navigate through intricate codebases with ease and confidence.

Wrapping up
In the moment I am writing this article the accuracy of OpenAI in code deobfuscation is good but sometimes far from perfect. But, as their AI models advance, decompilers and code deobfuscation techniques become more effective, helping researchers understand more complex code and identify vulnerabilities quickly.
The future looks exciting, as the integration of AI technologies with cybersecurity continue to flourish, it promises groundbreaking developments, transformative defenses, and enhanced protection for critical systems and sensitive data.
Thank you for reading this article and I hope you found it interesting 😃
As always don’t forget to follow me on medium and on Twitter for more articles on subjects I am passionate about. Cheers 🙌
https://alessio-trivisonno.medium.com/
Links
img: https://www.freepik.com/free-vector/javascript-abstract-concept-illustration_12290877.htm
Recommended reading :
