1
0
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-12-16 14:36:47 +00:00
This commit is contained in:
dirkf 2024-12-13 03:15:44 +00:00 committed by GitHub
commit 87ba614c4c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 541 additions and 186 deletions

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
@ -11,7 +12,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import math import math
import re import re
from youtube_dl.compat import compat_str from youtube_dl.compat import compat_str as str
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
NaN = object() NaN = object()
@ -19,7 +20,7 @@ NaN = object()
class TestJSInterpreter(unittest.TestCase): class TestJSInterpreter(unittest.TestCase):
def _test(self, jsi_or_code, expected, func='f', args=()): def _test(self, jsi_or_code, expected, func='f', args=()):
if isinstance(jsi_or_code, compat_str): if isinstance(jsi_or_code, str):
jsi_or_code = JSInterpreter(jsi_or_code) jsi_or_code = JSInterpreter(jsi_or_code)
got = jsi_or_code.call_function(func, *args) got = jsi_or_code.call_function(func, *args)
if expected is NaN: if expected is NaN:
@ -40,16 +41,27 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){return 42 + 7;}', 49) self._test('function f(){return 42 + 7;}', 49)
self._test('function f(){return 42 + undefined;}', NaN) self._test('function f(){return 42 + undefined;}', NaN)
self._test('function f(){return 42 + null;}', 42) self._test('function f(){return 42 + null;}', 42)
self._test('function f(){return 1 + "";}', '1')
self._test('function f(){return 42 + "7";}', '427')
self._test('function f(){return false + true;}', 1)
self._test('function f(){return "false" + true;}', 'falsetrue')
self._test('function f(){return '
'1 + "2" + [3,4] + {k: 56} + null + undefined + Infinity;}',
'123,4[object Object]nullundefinedInfinity')
def test_sub(self): def test_sub(self):
self._test('function f(){return 42 - 7;}', 35) self._test('function f(){return 42 - 7;}', 35)
self._test('function f(){return 42 - undefined;}', NaN) self._test('function f(){return 42 - undefined;}', NaN)
self._test('function f(){return 42 - null;}', 42) self._test('function f(){return 42 - null;}', 42)
self._test('function f(){return 42 - "7";}', 35)
self._test('function f(){return 42 - "spam";}', NaN)
def test_mul(self): def test_mul(self):
self._test('function f(){return 42 * 7;}', 294) self._test('function f(){return 42 * 7;}', 294)
self._test('function f(){return 42 * undefined;}', NaN) self._test('function f(){return 42 * undefined;}', NaN)
self._test('function f(){return 42 * null;}', 0) self._test('function f(){return 42 * null;}', 0)
self._test('function f(){return 42 * "7";}', 294)
self._test('function f(){return 42 * "eggs";}', NaN)
def test_div(self): def test_div(self):
jsi = JSInterpreter('function f(a, b){return a / b;}') jsi = JSInterpreter('function f(a, b){return a / b;}')
@ -57,17 +69,26 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, NaN, args=(JS_Undefined, 1)) self._test(jsi, NaN, args=(JS_Undefined, 1))
self._test(jsi, float('inf'), args=(2, 0)) self._test(jsi, float('inf'), args=(2, 0))
self._test(jsi, 0, args=(0, 3)) self._test(jsi, 0, args=(0, 3))
self._test(jsi, 6, args=(42, 7))
self._test(jsi, 0, args=(42, float('inf')))
self._test(jsi, 6, args=("42", 7))
self._test(jsi, NaN, args=("spam", 7))
def test_mod(self): def test_mod(self):
self._test('function f(){return 42 % 7;}', 0) self._test('function f(){return 42 % 7;}', 0)
self._test('function f(){return 42 % 0;}', NaN) self._test('function f(){return 42 % 0;}', NaN)
self._test('function f(){return 42 % undefined;}', NaN) self._test('function f(){return 42 % undefined;}', NaN)
self._test('function f(){return 42 % "7";}', 0)
self._test('function f(){return 42 % "beans";}', NaN)
def test_exp(self): def test_exp(self):
self._test('function f(){return 42 ** 2;}', 1764) self._test('function f(){return 42 ** 2;}', 1764)
self._test('function f(){return 42 ** undefined;}', NaN) self._test('function f(){return 42 ** undefined;}', NaN)
self._test('function f(){return 42 ** null;}', 1) self._test('function f(){return 42 ** null;}', 1)
self._test('function f(){return undefined ** 0;}', 1)
self._test('function f(){return undefined ** 42;}', NaN) self._test('function f(){return undefined ** 42;}', NaN)
self._test('function f(){return 42 ** "2";}', 1764)
self._test('function f(){return 42 ** "spam";}', NaN)
def test_calc(self): def test_calc(self):
self._test('function f(a){return 2*a+1;}', 7, args=[3]) self._test('function f(a){return 2*a+1;}', 7, args=[3])
@ -89,7 +110,35 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){return 19 & 21;}', 17) self._test('function f(){return 19 & 21;}', 17)
self._test('function f(){return 11 >> 2;}', 2) self._test('function f(){return 11 >> 2;}', 2)
self._test('function f(){return []? 2+3: 4;}', 5) self._test('function f(){return []? 2+3: 4;}', 5)
# equality
self._test('function f(){return 1 == 1}', True)
self._test('function f(){return 1 == 1.0}', True)
self._test('function f(){return 1 == "1"}', True)
self._test('function f(){return 1 == 2}', False) self._test('function f(){return 1 == 2}', False)
self._test('function f(){return 1 != "1"}', False)
self._test('function f(){return 1 != 2}', True)
self._test('function f(){var x = {a: 1}; var y = x; return x == y}', True)
self._test('function f(){var x = {a: 1}; return x == {a: 1}}', False)
self._test('function f(){return NaN == NaN}', False)
self._test('function f(){return null == undefined}', True)
self._test('function f(){return "spam, eggs" == "spam, eggs"}', True)
# strict equality
self._test('function f(){return 1 === 1}', True)
self._test('function f(){return 1 === 1.0}', True)
self._test('function f(){return 1 === "1"}', False)
self._test('function f(){return 1 === 2}', False)
self._test('function f(){var x = {a: 1}; var y = x; return x === y}', True)
self._test('function f(){var x = {a: 1}; return x === {a: 1}}', False)
self._test('function f(){return NaN === NaN}', False)
self._test('function f(){return null === undefined}', False)
self._test('function f(){return null === null}', True)
self._test('function f(){return undefined === undefined}', True)
self._test('function f(){return "uninterned" === "uninterned"}', True)
self._test('function f(){return 1 === 1}', True)
self._test('function f(){return 1 === "1"}', False)
self._test('function f(){return 1 !== 1}', False)
self._test('function f(){return 1 !== "1"}', True)
# expressions
self._test('function f(){return 0 && 1 || 2;}', 2) self._test('function f(){return 0 && 1 || 2;}', 2)
self._test('function f(){return 0 ?? 42;}', 0) self._test('function f(){return 0 ?? 42;}', 0)
self._test('function f(){return "life, the universe and everything" < 42;}', False) self._test('function f(){return "life, the universe and everything" < 42;}', False)
@ -111,7 +160,6 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51) self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11) self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
@unittest.skip('Not yet fully implemented')
def test_comments(self): def test_comments(self):
self._test(''' self._test('''
function f() { function f() {
@ -130,6 +178,15 @@ class TestJSInterpreter(unittest.TestCase):
} }
''', 3) ''', 3)
self._test('''
function f() {
var x = ( /* 1 + */ 2 +
/* 30 * 40 */
50);
return x;
}
''', 52)
def test_precedence(self): def test_precedence(self):
self._test(''' self._test('''
function f() { function f() {
@ -266,7 +323,20 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5) self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
def test_void(self): def test_void(self):
self._test('function f() { return void 42; }', None) self._test('function f() { return void 42; }', JS_Undefined)
def test_typeof(self):
self._test('function f() { return typeof undefined; }', 'undefined')
self._test('function f() { return typeof NaN; }', 'number')
self._test('function f() { return typeof Infinity; }', 'number')
self._test('function f() { return typeof true; }', 'boolean')
self._test('function f() { return typeof null; }', 'object')
self._test('function f() { return typeof "a string"; }', 'string')
self._test('function f() { return typeof 42; }', 'number')
self._test('function f() { return typeof 42.42; }', 'number')
self._test('function f() { var g = function(){}; return typeof g; }', 'function')
self._test('function f() { return typeof {key: "value"}; }', 'object')
# not yet implemented: Symbol, BigInt
def test_return_function(self): def test_return_function(self):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
@ -283,7 +353,7 @@ class TestJSInterpreter(unittest.TestCase):
def test_undefined(self): def test_undefined(self):
self._test('function f() { return undefined === undefined; }', True) self._test('function f() { return undefined === undefined; }', True)
self._test('function f() { return undefined; }', JS_Undefined) self._test('function f() { return undefined; }', JS_Undefined)
self._test('function f() {return undefined ?? 42; }', 42) self._test('function f() { return undefined ?? 42; }', 42)
self._test('function f() { let v; return v; }', JS_Undefined) self._test('function f() { let v; return v; }', JS_Undefined)
self._test('function f() { let v; return v**0; }', 1) self._test('function f() { let v; return v**0; }', 1)
self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }', self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
@ -324,6 +394,16 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f() { let a; return a?.qq; }', JS_Undefined) self._test('function f() { let a; return a?.qq; }', JS_Undefined)
self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined) self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
def test_indexing(self):
self._test('function f() { return [1, 2, 3, 4][3]}', 4)
self._test('function f() { return [1, [2, [3, [4]]]][1][1][1][0]}', 4)
self._test('function f() { var o = {1: 2, 3: 4}; return o[3]}', 4)
self._test('function f() { var o = {1: 2, 3: 4}; return o["3"]}', 4)
self._test('function f() { return [1, [2, {3: [4]}]][1][1]["3"][0]}', 4)
self._test('function f() { return [1, 2, 3, 4].length}', 4)
self._test('function f() { var o = {1: 2, 3: 4}; return o.length}', JS_Undefined)
self._test('function f() { var o = {1: 2, 3: 4}; o["length"] = 42; return o.length}', 42)
def test_regex(self): def test_regex(self):
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None) self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
@ -411,6 +491,13 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, 't-e-s-t', args=[test_input, '-']) self._test(jsi, 't-e-s-t', args=[test_input, '-'])
self._test(jsi, '', args=[[], '-']) self._test(jsi, '', args=[[], '-'])
self._test('function f(){return '
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join()}',
'1,1,abc,[object Object],,,Infinity,NaN')
self._test('function f(){return '
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join("~")}',
'1~1~abc~[object Object]~~~Infinity~NaN')
def test_split(self): def test_split(self):
test_result = list('test') test_result = list('test')
tests = [ tests = [
@ -424,6 +511,18 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, test_result, args=['t-e-s-t', '-']) self._test(jsi, test_result, args=['t-e-s-t', '-'])
self._test(jsi, [''], args=['', '-']) self._test(jsi, [''], args=['', '-'])
self._test(jsi, [], args=['', '']) self._test(jsi, [], args=['', ''])
# RegExp split
self._test('function f(){return "test".split(/(?:)/)}',
['t', 'e', 's', 't'])
self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
['t', 't'])
# from MDN: surrogate pairs aren't handled: case 1 fails
# self._test('function f(){return "😄😄".split(/(?:)/)}',
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
# case 2 beats Py3.2: it gets the case 1 result
if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
self._test('function f(){return "😄😄".split(/(?:)/u)}',
['😄', '😄'])
def test_slice(self): def test_slice(self):
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8]) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
@ -453,6 +552,40 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){return "012345678".slice(-1, 1)}', '') self._test('function f(){return "012345678".slice(-1, 1)}', '')
self._test('function f(){return "012345678".slice(-3, -1)}', '67') self._test('function f(){return "012345678".slice(-3, -1)}', '67')
def test_pop(self):
# pop
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
[8, [0, 1, 2, 3, 4, 5, 6, 7]])
self._test('function f(){return [].pop()}', JS_Undefined)
# push
self._test('function f(){var a = [0, 1, 2]; return [a.push(3, 4), a]}',
[5, [0, 1, 2, 3, 4]])
self._test('function f(){var a = [0, 1, 2]; return [a.push(), a]}',
[3, [0, 1, 2]])
def test_shift(self):
# shift
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.shift(), a]}',
[0, [1, 2, 3, 4, 5, 6, 7, 8]])
self._test('function f(){return [].shift()}', JS_Undefined)
# unshift
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(3, 4), a]}',
[5, [3, 4, 0, 1, 2]])
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(), a]}',
[3, [0, 1, 2]])
def test_forEach(self):
self._test('function f(){var ret = []; var l = [4, 2]; '
'var log = function(e,i,a){ret.push([e,i,a]);}; '
'l.forEach(log); '
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
[2, 4, 1, [4, 2]])
self._test('function f(){var ret = []; var l = [4, 2]; '
'var log = function(e,i,a){this.push([e,i,a]);}; '
'l.forEach(log, ret); '
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
[2, 4, 1, [4, 2]])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
@ -12,6 +13,7 @@ import re
import string import string
from youtube_dl.compat import ( from youtube_dl.compat import (
compat_contextlib_suppress,
compat_open as open, compat_open as open,
compat_str, compat_str,
compat_urlretrieve, compat_urlretrieve,
@ -50,23 +52,33 @@ _SIG_TESTS = [
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
84, 84,
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
83, 83,
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
), ),
( (
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
) ),
(
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
] ]
_NSIG_TESTS = [ _NSIG_TESTS = [
@ -142,6 +154,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ', 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
), ),
(
'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
),
( (
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A', 'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
@ -154,6 +170,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ', 'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
), ),
(
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
'1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
),
( (
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
@ -182,6 +202,14 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw', 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
), ),
(
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
),
(
'https://www.youtube.com/s/player/f8f53e1a/player_ias.vflset/en_US/base.js',
'VTQOUOv0mCIeJ7i8kZB', 'kcfD8wy0sNLyNQ',
),
] ]
@ -216,11 +244,9 @@ class TestSignature(unittest.TestCase):
os.mkdir(self.TESTDATA_DIR) os.mkdir(self.TESTDATA_DIR)
def tearDown(self): def tearDown(self):
try: with compat_contextlib_suppress(OSError):
for f in os.listdir(self.TESTDATA_DIR): for f in os.listdir(self.TESTDATA_DIR):
os.remove(f) os.remove(f)
except OSError:
pass
def t_factory(name, sig_func, url_pattern): def t_factory(name, sig_func, url_pattern):
@ -254,11 +280,12 @@ def signature(jscode, sig_input):
def n_sig(jscode, sig_input): def n_sig(jscode, sig_input):
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
return JSInterpreter(jscode).call_function(funcname, sig_input) return JSInterpreter(jscode).call_function(
funcname, sig_input, _ytdl_do_not_return=sig_input)
make_sig_test = t_factory( make_sig_test = t_factory(
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) 'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
for test_spec in _SIG_TESTS: for test_spec in _SIG_TESTS:
make_sig_test(*test_spec) make_sig_test(*test_spec)

View File

@ -3170,7 +3170,7 @@ class InfoExtractor(object):
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
# of jwplayer.flash.swf # of jwplayer.flash.swf
rtmp_url_parts = re.split( rtmp_url_parts = re.split(
r'((?:mp4|mp3|flv):)', source_url, 1) r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
if len(rtmp_url_parts) == 3: if len(rtmp_url_parts) == 3:
rtmp_url, prefix, play_path = rtmp_url_parts rtmp_url, prefix, play_path = rtmp_url_parts
a_format.update({ a_format.update({

View File

@ -1579,19 +1579,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.to_screen('Extracted signature function:\n' + code) self.to_screen('Extracted signature function:\n' + code)
def _parse_sig_js(self, jscode): def _parse_sig_js(self, jscode):
# Examples where `sig` is funcname:
# sig=function(a){a=a.split(""); ... ;return a.join("")};
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
# {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
# sig=function(J){J=J.split(""); ... ;return J.join("")};
# ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
# {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
funcname = self._search_regex( funcname = self._search_regex(
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', (r'\b(?P<var>[a-zA-Z0-9$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\((?P=var)\)\)',
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
# Old patterns
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)', r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
# Obsolete patterns # Obsolete patterns
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig') jscode, 'Initial JS player signature function name', group='sig')
@ -1658,36 +1665,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex( func_name, idx = self._search_regex(
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) # (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c) # (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b) # or: (b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c)
# or: (PL(a),b=a.j.n||null)&&(b=narray[idx](b)
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s* # old: (b=a.get("n"))&&(b=narray[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
# older: (b=a.get("n"))&&(b=nfunc(b) # older: (b=a.get("n"))&&(b=nfunc(b)
r'''(?x) r'''(?x)
\((?:[\w$()\s]+,)*?\s* # ( # (expr, ...,
(?P<b>[a-z])\s*=\s* # b= \((?:(?:\s*[\w$]+\s*=)?(?:[\w$"+\.\s(\[]+(?:[)\]]\s*)?),)*
(?: # b=...
(?: # expect ,c=a.get(b) (etc) (?P<b>[\w$]+)\s*=\s*(?!(?P=b)[^\w$])[\w$]+\s*(?:(?:
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| \.\s*[\w$]+ |
"n+"\[\s*\+?s*[\w$.]+\s*] \[\s*[\w$]+\s*\] |
)\s*(?:,[\w$()\s]+(?=,))*| \.\s*get\s*\(\s*[\w$"]+\s*\)
(?P<old>[\w$]+) # a (old[er]) )\s*){,2}(?:\s*\|\|\s*null(?=\s*\)))?\s*
)\s* \)\s*&&\s*\( # ...)&&(
(?(old) # b = nfunc, b = narray[idx]
# b.get("n") (?P=b)\s*=\s*(?P<nfunc>[\w$]+)\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*? (?:\[\s*(?P<idx>[\w$]+)\s*\]\s*)?
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\)) # (...)
| # ,c=a.get(b) \(\s*[\w$]+\s*\)
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
)
# interstitial junk
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
# nfunc|nfunc[idx]
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'), ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None)) default=(None, None))
# thx bashonly: yt-dlp/yt-dlp/pull/10611 # thx bashonly: yt-dlp/yt-dlp/pull/10611
@ -1697,15 +1697,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'''(?xs) r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $ (?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\) (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?["']enhanced_except_ \s*\{(?:(?!};).)+?(?:
["']enhanced_except_ |
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+
)
''', jscode, 'Initial JS player n function name', group='name') ''', jscode, 'Initial JS player n function name', group='name')
if not idx: if not idx:
return func_name return func_name
return self._parse_json(self._search_regex( return self._search_json(
r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode, r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)), 'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
func_name, transform_source=js_to_json)[int(idx)] func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
transform_source=js_to_json)[int(idx)]
def _extract_n_function_code(self, video_id, player_url): def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url) player_id = self._extract_player_info(player_url)
@ -1728,13 +1732,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def extract_nsig(s): def extract_nsig(s):
try: try:
ret = func([s]) ret = func([s], kwargs={'_ytdl_do_not_return': s})
except JSInterpreter.Exception: except JSInterpreter.Exception:
raise raise
except Exception as e: except Exception as e:
raise JSInterpreter.Exception(traceback.format_exc(), cause=e) raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
if ret.startswith('enhanced_except_'): if ret.startswith('enhanced_except_') or ret.endswith(s):
raise JSInterpreter.Exception('Signature function returned an exception') raise JSInterpreter.Exception('Signature function returned an exception')
return ret return ret

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools import itertools
@ -5,11 +6,12 @@ import json
import operator import operator
import re import re
from functools import update_wrapper from functools import update_wrapper, wraps
from .utils import ( from .utils import (
error_to_compat_str, error_to_compat_str,
ExtractorError, ExtractorError,
float_or_none,
js_to_json, js_to_json,
remove_quotes, remove_quotes,
unified_timestamp, unified_timestamp,
@ -20,9 +22,11 @@ from .compat import (
compat_basestring, compat_basestring,
compat_chr, compat_chr,
compat_collections_chain_map as ChainMap, compat_collections_chain_map as ChainMap,
compat_contextlib_suppress,
compat_filter as filter, compat_filter as filter,
compat_itertools_zip_longest as zip_longest, compat_itertools_zip_longest as zip_longest,
compat_map as map, compat_map as map,
compat_numeric_types,
compat_str, compat_str,
) )
@ -62,6 +66,10 @@ _NaN = float('nan')
_Infinity = float('inf') _Infinity = float('inf')
class JS_Undefined(object):
pass
def _js_bit_op(op): def _js_bit_op(op):
def zeroise(x): def zeroise(x):
@ -74,43 +82,114 @@ def _js_bit_op(op):
return wrapped return wrapped
def _js_arith_op(op): def _js_arith_op(op, div=False):
@wraps_op(op) @wraps_op(op)
def wrapped(a, b): def wrapped(a, b):
if JS_Undefined in (a, b): if JS_Undefined in (a, b):
return _NaN return _NaN
return op(a or 0, b or 0) # null, "" --> 0
a, b = (float_or_none(
(x.strip() if isinstance(x, compat_basestring) else x) or 0,
default=_NaN) for x in (a, b))
if _NaN in (a, b):
return _NaN
try:
return op(a, b)
except ZeroDivisionError:
return _NaN if not (div and (a or b)) else _Infinity
return wrapped return wrapped
def _js_div(a, b): _js_arith_add = _js_arith_op(operator.add)
if JS_Undefined in (a, b) or not (a or b):
return _NaN
return operator.truediv(a or 0, b) if b else _Infinity
def _js_mod(a, b): def _js_add(a, b):
if JS_Undefined in (a, b) or not b: if not (isinstance(a, compat_basestring) or isinstance(b, compat_basestring)):
return _NaN return _js_arith_add(a, b)
return (a or 0) % b if not isinstance(a, compat_basestring):
a = _js_toString(a)
elif not isinstance(b, compat_basestring):
b = _js_toString(b)
return operator.concat(a, b)
_js_mod = _js_arith_op(operator.mod)
__js_exp = _js_arith_op(operator.pow)
def _js_exp(a, b): def _js_exp(a, b):
if not b: if not b:
return 1 # even 0 ** 0 !! return 1 # even 0 ** 0 !!
elif JS_Undefined in (a, b): return __js_exp(a, b)
return _NaN
return (a or 0) ** b
def _js_eq_op(op): def _js_to_primitive(v):
return (
','.join(map(_js_toString, v)) if isinstance(v, list)
else '[object Object]' if isinstance(v, dict)
else compat_str(v) if not isinstance(v, (
compat_numeric_types, compat_basestring))
else v
)
def _js_toString(v):
return (
'undefined' if v is JS_Undefined
else 'Infinity' if v == _Infinity
else 'NaN' if v is _NaN
else 'null' if v is None
# bool <= int: do this first
else ('false', 'true')[v] if isinstance(v, bool)
else '{0:.7f}'.format(v).rstrip('.0') if isinstance(v, compat_numeric_types)
else _js_to_primitive(v))
_nullish = frozenset((None, JS_Undefined))
def _js_eq(a, b):
# NaN != any
if _NaN in (a, b):
return False
# Object is Object
if isinstance(a, type(b)) and isinstance(b, (dict, list)):
return operator.is_(a, b)
# general case
if a == b:
return True
# null == undefined
a_b = set((a, b))
if a_b & _nullish:
return a_b <= _nullish
a, b = _js_to_primitive(a), _js_to_primitive(b)
if not isinstance(a, compat_basestring):
a, b = b, a
# Number to String: convert the string to a number
# Conversion failure results in ... false
if isinstance(a, compat_basestring):
return float_or_none(a) == b
return a == b
def _js_neq(a, b):
return not _js_eq(a, b)
def _js_id_op(op):
@wraps_op(op) @wraps_op(op)
def wrapped(a, b): def wrapped(a, b):
if set((a, b)) <= set((None, JS_Undefined)): if _NaN in (a, b):
return op(a, a) return op(_NaN, None)
if not isinstance(a, (compat_basestring, compat_numeric_types)):
a, b = b, a
# strings are === if ==
# why 'a' is not 'a': https://stackoverflow.com/a/1504848
if isinstance(a, (compat_basestring, compat_numeric_types)):
return a == b if op(0, 0) else a != b
return op(a, b) return op(a, b)
return wrapped return wrapped
@ -138,25 +217,57 @@ def _js_ternary(cndn, if_true=True, if_false=False):
return if_true return if_true
def _js_unary_op(op):
@wraps_op(op)
def wrapped(_, a):
return op(a)
return wrapped
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/typeof
def _js_typeof(expr):
with compat_contextlib_suppress(TypeError, KeyError):
return {
JS_Undefined: 'undefined',
_NaN: 'number',
_Infinity: 'number',
True: 'boolean',
False: 'boolean',
None: 'object',
}[expr]
for t, n in (
(compat_basestring, 'string'),
(compat_numeric_types, 'number'),
):
if isinstance(expr, t):
return n
if callable(expr):
return 'function'
# TODO: Symbol, BigInt
return 'object'
# (op, definition) in order of binding priority, tightest first # (op, definition) in order of binding priority, tightest first
# avoid dict to maintain order # avoid dict to maintain order
# definition None => Defined in JSInterpreter._operator # definition None => Defined in JSInterpreter._operator
_OPERATORS = ( _OPERATORS = (
('>>', _js_bit_op(operator.rshift)), ('>>', _js_bit_op(operator.rshift)),
('<<', _js_bit_op(operator.lshift)), ('<<', _js_bit_op(operator.lshift)),
('+', _js_arith_op(operator.add)), ('+', _js_add),
('-', _js_arith_op(operator.sub)), ('-', _js_arith_op(operator.sub)),
('*', _js_arith_op(operator.mul)), ('*', _js_arith_op(operator.mul)),
('%', _js_mod), ('%', _js_mod),
('/', _js_div), ('/', _js_arith_op(operator.truediv, div=True)),
('**', _js_exp), ('**', _js_exp),
) )
_COMP_OPERATORS = ( _COMP_OPERATORS = (
('===', operator.is_), ('===', _js_id_op(operator.is_)),
('!==', operator.is_not), ('!==', _js_id_op(operator.is_not)),
('==', _js_eq_op(operator.eq)), ('==', _js_eq),
('!=', _js_eq_op(operator.ne)), ('!=', _js_neq),
('<=', _js_comp_op(operator.le)), ('<=', _js_comp_op(operator.le)),
('>=', _js_comp_op(operator.ge)), ('>=', _js_comp_op(operator.ge)),
('<', _js_comp_op(operator.lt)), ('<', _js_comp_op(operator.lt)),
@ -176,6 +287,11 @@ _SC_OPERATORS = (
('&&', None), ('&&', None),
) )
_UNARY_OPERATORS_X = (
('void', _js_unary_op(lambda _: JS_Undefined)),
('typeof', _js_unary_op(_js_typeof)),
)
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS)) _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
_NAME_RE = r'[a-zA-Z_$][\w$]*' _NAME_RE = r'[a-zA-Z_$][\w$]*'
@ -183,10 +299,6 @@ _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
_QUOTES = '\'"/' _QUOTES = '\'"/'
class JS_Undefined(object):
pass
class JS_Break(ExtractorError): class JS_Break(ExtractorError):
def __init__(self): def __init__(self):
ExtractorError.__init__(self, 'Invalid break') ExtractorError.__init__(self, 'Invalid break')
@ -242,6 +354,7 @@ class Debugger(object):
@classmethod @classmethod
def wrap_interpreter(cls, f): def wrap_interpreter(cls, f):
@wraps(f)
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs): def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
if cls.ENABLED and stmt.strip(): if cls.ENABLED and stmt.strip():
cls.write(stmt, level=allow_recursion) cls.write(stmt, level=allow_recursion)
@ -255,7 +368,7 @@ class Debugger(object):
raise raise
if cls.ENABLED and stmt.strip(): if cls.ENABLED and stmt.strip():
if should_ret or repr(ret) != stmt: if should_ret or repr(ret) != stmt:
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion) cls.write(['->', '=>'][bool(should_ret)], repr(ret), '<-|', stmt, level=allow_recursion)
return ret, should_ret return ret, should_ret
return interpret_statement return interpret_statement
@ -284,6 +397,9 @@ class JSInterpreter(object):
RE_FLAGS = { RE_FLAGS = {
# special knowledge: Python's re flags are bitmask values, current max 128 # special knowledge: Python's re flags are bitmask values, current max 128
# invent new bitmask values well above that for literal parsing # invent new bitmask values well above that for literal parsing
# JS 'u' flag is effectively always set (surrogate pairs aren't seen),
# but \u{...} and \p{...} escapes aren't handled); no additional JS 'v'
# features are supported
# TODO: execute matches with these flags (remaining: d, y) # TODO: execute matches with these flags (remaining: d, y)
'd': 1024, # Generate indices for substring matches 'd': 1024, # Generate indices for substring matches
'g': 2048, # Global search 'g': 2048, # Global search
@ -291,6 +407,7 @@ class JSInterpreter(object):
'm': re.M, # Multi-line search 'm': re.M, # Multi-line search
's': re.S, # Allows . to match newline characters 's': re.S, # Allows . to match newline characters
'u': re.U, # Treat a pattern as a sequence of unicode code points 'u': re.U, # Treat a pattern as a sequence of unicode code points
'v': re.U, # Like 'u' with extended character class and \p{} syntax
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string 'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
} }
@ -347,6 +464,8 @@ class JSInterpreter(object):
def __op_chars(cls): def __op_chars(cls):
op_chars = set(';,[') op_chars = set(';,[')
for op in cls._all_operators(): for op in cls._all_operators():
if op[0].isalpha():
continue
op_chars.update(op[0]) op_chars.update(op[0])
return op_chars return op_chars
@ -369,9 +488,18 @@ class JSInterpreter(object):
skipping = 0 skipping = 0
if skip_delims: if skip_delims:
skip_delims = variadic(skip_delims) skip_delims = variadic(skip_delims)
skip_txt = None
for idx, char in enumerate(expr): for idx, char in enumerate(expr):
if skip_txt and idx <= skip_txt[1]:
continue
paren_delta = 0 paren_delta = 0
if not in_quote: if not in_quote:
if char == '/' and expr[idx:idx + 2] == '/*':
# skip a comment
skip_txt = expr[idx:].find('*/', 2)
skip_txt = [idx, idx + skip_txt + 1] if skip_txt >= 2 else None
if skip_txt:
continue
if char in _MATCHING_PARENS: if char in _MATCHING_PARENS:
counters[_MATCHING_PARENS[char]] += 1 counters[_MATCHING_PARENS[char]] += 1
paren_delta = 1 paren_delta = 1
@ -404,11 +532,18 @@ class JSInterpreter(object):
if pos < delim_len: if pos < delim_len:
pos += 1 pos += 1
continue continue
if skip_txt and skip_txt[0] >= start and skip_txt[1] <= idx - delim_len:
yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1: idx - delim_len]
else:
yield expr[start: idx - delim_len] yield expr[start: idx - delim_len]
skip_txt = None
start, pos = idx + 1, 0 start, pos = idx + 1, 0
splits += 1 splits += 1
if max_split and splits >= max_split: if max_split and splits >= max_split:
break break
if skip_txt and skip_txt[0] >= start:
yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1:]
else:
yield expr[start:] yield expr[start:]
@classmethod @classmethod
@ -425,7 +560,7 @@ class JSInterpreter(object):
if not _cached: if not _cached:
_cached.extend(itertools.chain( _cached.extend(itertools.chain(
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)) _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X))
return _cached return _cached
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
@ -449,13 +584,14 @@ class JSInterpreter(object):
except Exception as e: except Exception as e:
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e) raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
def _index(self, obj, idx, allow_undefined=False): def _index(self, obj, idx, allow_undefined=True):
if idx == 'length': if idx == 'length' and isinstance(obj, list):
return len(obj) return len(obj)
try: try:
return obj[int(idx)] if isinstance(obj, list) else obj[idx] return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
except Exception as e: except (TypeError, KeyError, IndexError) as e:
if allow_undefined: if allow_undefined:
# when is not allowed?
return JS_Undefined return JS_Undefined
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e) raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
@ -467,7 +603,7 @@ class JSInterpreter(object):
# used below # used below
_VAR_RET_THROW_RE = re.compile(r'''(?x) _VAR_RET_THROW_RE = re.compile(r'''(?x)
(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?P<throw>throw\s+) (?:(?P<var>var|const|let)\s+|(?P<ret>return)(?:\s+|(?=["'])|$)|(?P<throw>throw)\s+)
''') ''')
_COMPOUND_RE = re.compile(r'''(?x) _COMPOUND_RE = re.compile(r'''(?x)
(?P<try>try)\s*\{| (?P<try>try)\s*\{|
@ -479,6 +615,52 @@ class JSInterpreter(object):
_FINALLY_RE = re.compile(r'finally\s*\{') _FINALLY_RE = re.compile(r'finally\s*\{')
_SWITCH_RE = re.compile(r'switch\s*\(') _SWITCH_RE = re.compile(r'switch\s*\(')
def handle_operators(self, expr, local_vars, allow_recursion):
for op, _ in self._all_operators():
# hackety: </> have higher priority than <</>>, but don't confuse them
skip_delim = (op + op) if op in '<>*?' else None
if op == '?':
skip_delim = (skip_delim, '?.')
separated = list(self._separate(expr, op, skip_delims=skip_delim))
if len(separated) < 2:
continue
right_expr = separated.pop()
# handle operators that are both unary and binary, minimal BODMAS
if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0
separated = [s.strip() for s in separated]
while len(separated) > 1 and not separated[-1]:
undone += 1
separated.pop()
if op == '-' and undone % 2 != 0:
right_expr = op + right_expr
elif op == '+':
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
right_expr = separated.pop() + right_expr
if separated[-1][-1:] in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1] if separated else ''
for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr
if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None
break
if right_expr is None:
continue
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True
@Debugger.wrap_interpreter @Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100): def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0: if allow_recursion < 0:
@ -501,7 +683,7 @@ class JSInterpreter(object):
expr = stmt[len(m.group(0)):].strip() expr = stmt[len(m.group(0)):].strip()
if m.group('throw'): if m.group('throw'):
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion)) raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
should_return = not m.group('var') should_return = 'return' if m.group('ret') else False
if not expr: if not expr:
return None, should_return return None, should_return
@ -533,9 +715,15 @@ class JSInterpreter(object):
else: else:
raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr) raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
if expr.startswith('void '): for op, _ in _UNARY_OPERATORS_X:
left = self.interpret_expression(expr[5:], local_vars, allow_recursion) if not expr.startswith(op):
return None, should_return continue
operand = expr[len(op):]
if not operand or operand[0] != ' ':
continue
op_result = self.handle_operators(expr, local_vars, allow_recursion)
if op_result:
return op_result[0], should_return
if expr.startswith('{'): if expr.startswith('{'):
inner, outer = self._separate_at_paren(expr) inner, outer = self._separate_at_paren(expr)
@ -582,7 +770,7 @@ class JSInterpreter(object):
if_expr, expr = self._separate_at_paren(expr) if_expr, expr = self._separate_at_paren(expr)
else: else:
# may lose ... else ... because of ll.368-374 # may lose ... else ... because of ll.368-374
if_expr, expr = self._separate_at_paren(expr, delim=';') if_expr, expr = self._separate_at_paren(' %s;' % (expr,), delim=';')
else_expr = None else_expr = None
m = re.match(r'else\s*(?P<block>\{)?', expr) m = re.match(r'else\s*(?P<block>\{)?', expr)
if m: if m:
@ -720,7 +908,7 @@ class JSInterpreter(object):
start, end = m.span() start, end = m.span()
sign = m.group('pre_sign') or m.group('post_sign') sign = m.group('pre_sign') or m.group('post_sign')
ret = local_vars[var] ret = local_vars[var]
local_vars[var] += 1 if sign[0] == '+' else -1 local_vars[var] = _js_add(ret, 1 if sign[0] == '+' else -1)
if m.group('pre_sign'): if m.group('pre_sign'):
ret = local_vars[var] ret = local_vars[var]
expr = expr[:start] + self._dump(ret, local_vars) + expr[end:] expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
@ -730,13 +918,13 @@ class JSInterpreter(object):
m = re.match(r'''(?x) m = re.match(r'''(?x)
(?P<assign> (?P<assign>
(?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s* (?P<out>{_NAME_RE})(?:\[(?P<out_idx>(?:.+?\]\s*\[)*.+?)\])?\s*
(?P<op>{_OPERATOR_RE})? (?P<op>{_OPERATOR_RE})?
=(?!=)(?P<expr>.*)$ =(?!=)(?P<expr>.*)$
)|(?P<return> )|(?P<return>
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$ (?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
)|(?P<indexing> )|(?P<indexing>
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$ (?P<in>{_NAME_RE})\[(?P<in_idx>(?:.+?\]\s*\[)*.+?)\]$
)|(?P<attribute> )|(?P<attribute>
(?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s* (?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
)|(?P<function> )|(?P<function>
@ -746,19 +934,23 @@ class JSInterpreter(object):
if md.get('assign'): if md.get('assign'):
left_val = local_vars.get(m.group('out')) left_val = local_vars.get(m.group('out'))
if not m.group('index'): if not m.group('out_idx'):
local_vars[m.group('out')] = self._operator( local_vars[m.group('out')] = self._operator(
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion) m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
return local_vars[m.group('out')], should_return return local_vars[m.group('out')], should_return
elif left_val in (None, JS_Undefined): elif left_val in (None, JS_Undefined):
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr) raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) indexes = re.split(r'\]\s*\[', m.group('out_idx'))
if not isinstance(idx, (int, float)): for i, idx in enumerate(indexes, 1):
raise self.Exception('List index %s must be integer' % (idx, ), expr=expr) idx = self.interpret_expression(idx, local_vars, allow_recursion)
if i < len(indexes):
left_val = self._index(left_val, idx)
if isinstance(idx, float):
idx = int(idx) idx = int(idx)
left_val[idx] = self._operator( left_val[idx] = self._operator(
m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion) m.group('op'), self._index(left_val, idx) if m.group('op') else None,
m.group('expr'), expr, local_vars, allow_recursion)
return left_val[idx], should_return return left_val[idx], should_return
elif expr.isdigit(): elif expr.isdigit():
@ -776,63 +968,31 @@ class JSInterpreter(object):
return _Infinity, should_return return _Infinity, should_return
elif md.get('return'): elif md.get('return'):
return local_vars[m.group('name')], should_return ret = local_vars[m.group('name')]
# challenge may try to force returning the original value
# use an optional internal var to block this
if should_return == 'return':
if '_ytdl_do_not_return' not in local_vars:
return ret, True
return (ret, True) if ret != local_vars['_ytdl_do_not_return'] else (ret, False)
else:
return ret, should_return
try: with compat_contextlib_suppress(ValueError):
ret = json.loads(js_to_json(expr)) # strict=True) ret = json.loads(js_to_json(expr)) # strict=True)
if not md.get('attribute'): if not md.get('attribute'):
return ret, should_return return ret, should_return
except ValueError:
pass
if md.get('indexing'): if md.get('indexing'):
val = local_vars[m.group('in')] val = local_vars[m.group('in')]
idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion) for idx in re.split(r'\]\s*\[', m.group('in_idx')):
return self._index(val, idx), should_return idx = self.interpret_expression(idx, local_vars, allow_recursion)
val = self._index(val, idx)
return val, should_return
for op, _ in self._all_operators(): op_result = self.handle_operators(expr, local_vars, allow_recursion)
# hackety: </> have higher priority than <</>>, but don't confuse them if op_result:
skip_delim = (op + op) if op in '<>*?' else None return op_result[0], should_return
if op == '?':
skip_delim = (skip_delim, '?.')
separated = list(self._separate(expr, op, skip_delims=skip_delim))
if len(separated) < 2:
continue
right_expr = separated.pop()
# handle operators that are both unary and binary, minimal BODMAS
if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0
separated = [s.strip() for s in separated]
while len(separated) > 1 and not separated[-1]:
undone += 1
separated.pop()
if op == '-' and undone % 2 != 0:
right_expr = op + right_expr
elif op == '+':
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
right_expr = separated.pop() + right_expr
if separated[-1][-1:] in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1] if separated else ''
for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr
if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None
break
if right_expr is None:
continue
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
if md.get('attribute'): if md.get('attribute'):
variable, member, nullish = m.group('var', 'member', 'nullish') variable, member, nullish = m.group('var', 'member', 'nullish')
@ -877,7 +1037,7 @@ class JSInterpreter(object):
# Member access # Member access
if arg_str is None: if arg_str is None:
return self._index(obj, member, nullish) return self._index(obj, member)
# Function call # Function call
argvals = [ argvals = [
@ -904,7 +1064,7 @@ class JSInterpreter(object):
if obj is compat_str: if obj is compat_str:
if member == 'fromCharCode': if member == 'fromCharCode':
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')
return ''.join(map(compat_chr, argvals)) return ''.join(compat_chr(int(n)) for n in argvals)
raise self.Exception('Unsupported string method ' + member, expr=expr) raise self.Exception('Unsupported string method ' + member, expr=expr)
elif obj is float: elif obj is float:
if member == 'pow': if member == 'pow':
@ -913,13 +1073,47 @@ class JSInterpreter(object):
raise self.Exception('Unsupported Math method ' + member, expr=expr) raise self.Exception('Unsupported Math method ' + member, expr=expr)
if member == 'split': if member == 'split':
assertion(argvals, 'takes one or more arguments') assertion(len(argvals) <= 2, 'takes at most two arguments')
assertion(len(argvals) == 1, 'with limit argument is not implemented') if len(argvals) > 1:
return obj.split(argvals[0]) if argvals[0] else list(obj) limit = argvals[1]
assertion(isinstance(limit, int) and limit >= 0, 'integer limit >= 0')
if limit == 0:
return []
else:
limit = 0
if len(argvals) == 0:
argvals = [JS_Undefined]
elif isinstance(argvals[0], self.JS_RegExp):
# avoid re.split(), similar but not enough
def where():
for m in argvals[0].finditer(obj):
yield m.span(0)
yield (None, None)
def splits(limit=limit):
i = 0
for j, jj in where():
if j == jj == 0:
continue
if j is None and i >= len(obj):
break
yield obj[i:j]
if jj is None or limit == 1:
break
limit -= 1
i = jj
return list(splits())
return (
obj.split(argvals[0], limit - 1) if argvals[0] and argvals[0] != JS_Undefined
else list(obj)[:limit or None])
elif member == 'join': elif member == 'join':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, list), 'must be applied on a list')
assertion(len(argvals) == 1, 'takes exactly one argument') assertion(len(argvals) <= 1, 'takes at most one argument')
return argvals[0].join(obj) return (',' if len(argvals) == 0 else argvals[0]).join(
('' if x in (None, JS_Undefined) else _js_toString(x))
for x in obj)
elif member == 'reverse': elif member == 'reverse':
assertion(not argvals, 'does not take any arguments') assertion(not argvals, 'does not take any arguments')
obj.reverse() obj.reverse()
@ -941,37 +1135,31 @@ class JSInterpreter(object):
index, how_many = map(int, (argvals + [len(obj)])[:2]) index, how_many = map(int, (argvals + [len(obj)])[:2])
if index < 0: if index < 0:
index += len(obj) index += len(obj)
add_items = argvals[2:] res = [obj.pop(index)
res = [] for _ in range(index, min(index + how_many, len(obj)))]
for _ in range(index, min(index + how_many, len(obj))): obj[index:index] = argvals[2:]
res.append(obj.pop(index))
for i, item in enumerate(add_items):
obj.insert(index + i, item)
return res return res
elif member == 'unshift': elif member in ('shift', 'pop'):
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments')
for item in reversed(argvals):
obj.insert(0, item)
return obj
elif member == 'pop':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, list), 'must be applied on a list')
assertion(not argvals, 'does not take any arguments') assertion(not argvals, 'does not take any arguments')
if not obj: return obj.pop(0 if member == 'shift' else -1) if len(obj) > 0 else JS_Undefined
return elif member == 'unshift':
return obj.pop() assertion(isinstance(obj, list), 'must be applied on a list')
# not enforced: assertion(argvals, 'takes one or more arguments')
obj[0:0] = argvals
return len(obj)
elif member == 'push': elif member == 'push':
assertion(argvals, 'takes one or more arguments') # not enforced: assertion(argvals, 'takes one or more arguments')
obj.extend(argvals) obj.extend(argvals)
return obj return len(obj)
elif member == 'forEach': elif member == 'forEach':
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')
assertion(len(argvals) <= 2, 'takes at-most 2 arguments') assertion(len(argvals) <= 2, 'takes at most 2 arguments')
f, this = (argvals + [''])[:2] f, this = (argvals + [''])[:2]
return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)] return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
elif member == 'indexOf': elif member == 'indexOf':
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')
assertion(len(argvals) <= 2, 'takes at-most 2 arguments') assertion(len(argvals) <= 2, 'takes at most 2 arguments')
idx, start = (argvals + [0])[:2] idx, start = (argvals + [0])[:2]
try: try:
return obj.index(idx, start) return obj.index(idx, start)
@ -980,7 +1168,7 @@ class JSInterpreter(object):
elif member == 'charCodeAt': elif member == 'charCodeAt':
assertion(isinstance(obj, compat_str), 'must be applied on a string') assertion(isinstance(obj, compat_str), 'must be applied on a string')
# assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced # assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced
idx = argvals[0] if isinstance(argvals[0], int) else 0 idx = argvals[0] if len(argvals) > 0 and isinstance(argvals[0], int) else 0
if idx >= len(obj): if idx >= len(obj):
return None return None
return ord(obj[idx]) return ord(obj[idx])
@ -1031,7 +1219,7 @@ class JSInterpreter(object):
yield self.interpret_expression(v, local_vars, allow_recursion) yield self.interpret_expression(v, local_vars, allow_recursion)
def extract_object(self, objname): def extract_object(self, objname):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' _FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
obj = {} obj = {}
fields = next(filter(None, ( fields = next(filter(None, (
obj_m.group('fields') for obj_m in re.finditer( obj_m.group('fields') for obj_m in re.finditer(
@ -1090,6 +1278,7 @@ class JSInterpreter(object):
def extract_function_from_code(self, argnames, code, *global_stack): def extract_function_from_code(self, argnames, code, *global_stack):
local_vars = {} local_vars = {}
while True: while True:
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code) mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
if mobj is None: if mobj is None:
@ -1100,10 +1289,11 @@ class JSInterpreter(object):
[x.strip() for x in mobj.group('args').split(',')], [x.strip() for x in mobj.group('args').split(',')],
body, local_vars, *global_stack)) body, local_vars, *global_stack))
code = code[:start] + name + remaining code = code[:start] + name + remaining
return self.build_function(argnames, code, local_vars, *global_stack) return self.build_function(argnames, code, local_vars, *global_stack)
def call_function(self, funcname, *args): def call_function(self, funcname, *args, **kw_global_vars):
return self.extract_function(funcname)(args) return self.extract_function(funcname)(args, kw_global_vars)
@classmethod @classmethod
def build_arglist(cls, arg_text): def build_arglist(cls, arg_text):
@ -1122,8 +1312,9 @@ class JSInterpreter(object):
global_stack = list(global_stack) or [{}] global_stack = list(global_stack) or [{}]
argnames = tuple(argnames) argnames = tuple(argnames)
def resf(args, kwargs={}, allow_recursion=100): def resf(args, kwargs=None, allow_recursion=100):
global_stack[0].update(zip_longest(argnames, args, fillvalue=None)) kwargs = kwargs or {}
global_stack[0].update(zip_longest(argnames, args, fillvalue=JS_Undefined))
global_stack[0].update(kwargs) global_stack[0].update(kwargs)
var_stack = LocalNameSpace(*global_stack) var_stack = LocalNameSpace(*global_stack)
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1) ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)