首页 > 技术文章 > 【JS逆向】之动态cookie

Eliphaz 2021-07-05 20:05 原文

  这类反爬比起前篇讲的加速乐简单很多,笔者遇到的网站的cookie由两个参数组成,即从返回的js中,将这两个cookie参数找出。

如下图,是该网站返回内容,一眼可以看到document.cookie,熟悉的应该都知道,这段js可以生成我们所需要的cookie。

  将代码复制到webstom中,新建html文件,将其格式化,并将JS代码提取出来,并对其进行分析清洗(去除无效参数函数等)

改动前js:

!function () {
        var o = "00000000ba3dc2781e265cb031f086a4c2e00e2c4a686a4c2e00e2c4a688";

        function k(e, t) {
            var o = new Date;
            o.setTime(o.getTime() + 864e5), document.cookie = e + "=" + escape(t) + ";expires=" + o.toGMTString() + ";path=/"
        }

        function e(e, t) {
            return parseInt(e + "", t).toString(16)
        }

        function t() {
            var e = window.OfflineAudioContext || window.webkitOfflineAudioContext;
            if (null == e) return "nt";
            var t = new e(1, 44100, 44100), n = t.createOscillator(), o;
            n.type = "triangle";
            o = t.createDynamicsCompressor();
            void 0 !== o.ratio && "function" == typeof o.ratio.setValueAtTime && o.ratio.setValueAtTime(12, t.currentTime), n.connect(o), o.connect(t.destination), n.start(0), t.startRendering(), t.oncomplete = function () {
                var e;
                try {
                    e = 1838.1212, n.disconnect(), o.disconnect()
                } catch (t) {
                    return t
                }
                return e
            }
        }

        var n, a, l, d, u, x, i = "5df3dc4b7aaea8a", r = "VudF9fX19lEVENT";
        k(i, o);
        l = document.documentMode || "nt" !== t() ? "1" : "0", d = "nt" === (window.screen.colorDepth || "nt") ? "0" : "1", u = o.slice(o.length - 4), a = e(l + d + "11", 2) + "fffffff";
        for (var s = o.slice(o.length - 8), m = "", w = 0; 8 > w; w++) m += e("0x" + a.charAt(w) ^ "0x" + s.charAt(w), 10);
        k(r, u + m), window.location.reload()
    }()

分析时先整体看一遍,再看情况打断点。从下图几行代码,可以看出其是生成cookie的关键所在。第一行很明显是告诉我们两个cookie参数的键名是什么。

 k(i, o);该行代码很明显是第一个参数的生成过程,此时可以在k函数处打断点,通过调试发现调用前后o的值不变,故第一个参数就是o,我们可以通过正则

来提取。(在调试的过程中会发现,document is not defined错误,此时补上doucment={};即可)

第三行代码看起来挺复杂,但是我们不用管太多,打断点调试即可。调试发现缺少windows环境,由于我们使用的是nodejs,很明显是缺少环境的,我们可

以去浏览器console处去调试。

 将其替换成undefined,此时l的结果已经显而易见,JS中||只要有一个为True,结果就是True,即l="1",此时t函数也可以直接删除。

d的调试结果是1,将js中对应代码替换成"1"。

 下面u和m很明显是nodejs可以执行的,所以可以不用管(第四行也是如此)。

k(r,u+m)很明显是第二个参数的生成过程,并且就是u+m的值

最后window.location.reload()是重载代码,可以直接注释

最终改动后js如下所示:

// !function () { //!表示构建一个立即执行函数
    var o = "00000000ba3dc2781e265cb031f086a4c2e00e2c4a686a4c2e00e2c4a688"; //第一个cookie参数
    document = {}; //调试发现document未定义,补全
    
    //第一个参数生成函数,调用前后不变
    // function k(e, t) {
    //     var o = new Date;
    //     o.setTime(o.getTime() + 864e5), document.cookie = e + "=" + escape(t) + ";expires=" + o.toGMTString() + ";path=/"
    // }

    function e(e, t) {
        return parseInt(e + "", t).toString(16)
    }

    // function t() {
    //     var e = window.OfflineAudioContext || window.webkitOfflineAudioContext;
    //     if (null == e) return "nt";
    //     var t = new e(1, 44100, 44100), n = t.createOscillator(), o;
    //     n.type = "triangle";
    //     o = t.createDynamicsCompressor();
    //     void 0 !== o.ratio && "function" == typeof o.ratio.setValueAtTime && o.ratio.setValueAtTime(12, t.currentTime), n.connect(o), o.connect(t.destination), n.start(0), t.startRendering(), t.oncomplete = function () {
    //         var e;
    //         try {
    //             e = 1838.1212, n.disconnect(), o.disconnect()
    //         } catch (t) {
    //             return t
    //         }
    //         return e
    //     }
    // }

function aa() {
    var o = "00000000b9c7f73b9698498a498ac5e781a34d6f092b2e086a44a680e2cc";
    l = '1', d = '1',
        u = o.slice(o.length - 4),
        a = e(l + d + "11", 2) + "fffffff";
    for (var s = o.slice(o.length - 8), m = "", w = 0; 8 > w; w++) m += e("0x" + a.charAt(w) ^ "0x" + s.charAt(w), 10);
    return u + m
}
    //window.location.reload()
// }()

相关爬虫代码:

def parse_cookie(self, response):
resp_url = response.url
resp_meta = copy.deepcopy(response.meta)
resp_text = response.text
try:
if response.status != 200:
o_str = re.findall(r'o="(.*?)";', response.text)[0]
cookie_js = """
function e(e, t) {
return parseInt(e + "", t).toString(16)
}""" + """function aa() {""" + 'var o = "'+ o_str + '";' + """l = '1', d = '1',
u = o.slice(o.length - 4),
a = e(l + d + "11", 2) + "fffffff";
for (var s = o.slice(o.length - 8), m = "", w = 0; 8 > w; w++) m += e("0x" + a.charAt(w) ^ "0x" + s.charAt(w), 10);
return u + m}"""
js_result = pyv8_engine_service(cookie_js, functionName='aa')
cookie = "5df3dc4b7aaea8a=" + o_str + ";" + "VudF9fX19lEVENT=" + js_result
self.headers["cookie"] = cookie
yield scrapy.Request(url=resp_url, method='GET', headers=self.headers, encoding="utf-8",
dont_filter=True, callback=self.parse_cookie1, meta={**resp_meta, "resp_url": resp_url})
else:
yield scrapy.Request(url=resp_url, method='GET', headers=self.headers, encoding="utf-8",
dont_filter=True, callback=self.parse, meta={**resp_meta, "resp_url": resp_url})
except:
traceback.print_exc()
self.logger.info(f"parse error url: {resp_url}")

def parse_cookie1(self, response):
resp_url = response.url
resp_meta = copy.deepcopy(response.meta)
resp_text = response.text
try:
if response.status != 200:
o_str = re.findall(r'o="(.*?)";', response.text)[0]
cookie_js = """
function e(e, t) {
return parseInt(e + "", t).toString(16)
}""" + """function aa() {""" + 'var o = "' + o_str + '";' + """l = '1', d = '1',
u = o.slice(o.length - 4),
a = e(l + d + "11", 2) + "fffffff";
for (var s = o.slice(o.length - 8), m = "", w = 0; 8 > w; w++) m += e("0x" + a.charAt(w) ^ "0x" + s.charAt(w), 10);
return u + m}"""
js_result = pyv8_engine_service(cookie_js,functionName='aa')
cookie = "5df3dc4b7aaea8a=" + o_str + ";" + "VudF9fX19lEVENT=" + js_result
self.headers["cookie"] = cookie
yield scrapy.Request(url=resp_url, method='GET', headers=self.headers, encoding="utf-8",
dont_filter=True, callback=self.parse, meta={**resp_meta, "resp_url": resp_url})
except:
traceback.print_exc()
self.logger.info(f"parse error url: {resp_url}")

推荐阅读