mirror of
https://github.com/ytdl-org/youtube-dl
synced 2024-12-16 06:27:00 +00:00
Compare commits
8 Commits
9f17a6dff6
...
87ba614c4c
Author | SHA1 | Date | |
---|---|---|---|
|
87ba614c4c | ||
|
3d649843fe | ||
|
9bc8fcf23c | ||
|
a93fff3052 | ||
|
2968201ae0 | ||
|
067ad7439c | ||
|
7ab85d88d6 | ||
|
4bf85ca5ba |
@ -160,7 +160,6 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
|
||||
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
|
||||
|
||||
@unittest.skip('Not yet fully implemented')
|
||||
def test_comments(self):
|
||||
self._test('''
|
||||
function f() {
|
||||
@ -179,6 +178,15 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
}
|
||||
''', 3)
|
||||
|
||||
self._test('''
|
||||
function f() {
|
||||
var x = ( /* 1 + */ 2 +
|
||||
/* 30 * 40 */
|
||||
50);
|
||||
return x;
|
||||
}
|
||||
''', 52)
|
||||
|
||||
def test_precedence(self):
|
||||
self._test('''
|
||||
function f() {
|
||||
@ -483,6 +491,13 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
|
||||
self._test(jsi, '', args=[[], '-'])
|
||||
|
||||
self._test('function f(){return '
|
||||
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join()}',
|
||||
'1,1,abc,[object Object],,,Infinity,NaN')
|
||||
self._test('function f(){return '
|
||||
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join("~")}',
|
||||
'1~1~abc~[object Object]~~~Infinity~NaN')
|
||||
|
||||
def test_split(self):
|
||||
test_result = list('test')
|
||||
tests = [
|
||||
@ -496,6 +511,18 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, test_result, args=['t-e-s-t', '-'])
|
||||
self._test(jsi, [''], args=['', '-'])
|
||||
self._test(jsi, [], args=['', ''])
|
||||
# RegExp split
|
||||
self._test('function f(){return "test".split(/(?:)/)}',
|
||||
['t', 'e', 's', 't'])
|
||||
self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
|
||||
['t', 't'])
|
||||
# from MDN: surrogate pairs aren't handled: case 1 fails
|
||||
# self._test('function f(){return "😄😄".split(/(?:)/)}',
|
||||
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
|
||||
# case 2 beats Py3.2: it gets the case 1 result
|
||||
if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
|
||||
self._test('function f(){return "😄😄".split(/(?:)/u)}',
|
||||
['😄', '😄'])
|
||||
|
||||
def test_slice(self):
|
||||
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||
@ -525,6 +552,40 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return "012345678".slice(-1, 1)}', '')
|
||||
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
|
||||
|
||||
def test_pop(self):
|
||||
# pop
|
||||
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
|
||||
[8, [0, 1, 2, 3, 4, 5, 6, 7]])
|
||||
self._test('function f(){return [].pop()}', JS_Undefined)
|
||||
# push
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.push(3, 4), a]}',
|
||||
[5, [0, 1, 2, 3, 4]])
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.push(), a]}',
|
||||
[3, [0, 1, 2]])
|
||||
|
||||
def test_shift(self):
|
||||
# shift
|
||||
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.shift(), a]}',
|
||||
[0, [1, 2, 3, 4, 5, 6, 7, 8]])
|
||||
self._test('function f(){return [].shift()}', JS_Undefined)
|
||||
# unshift
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(3, 4), a]}',
|
||||
[5, [3, 4, 0, 1, 2]])
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(), a]}',
|
||||
[3, [0, 1, 2]])
|
||||
|
||||
def test_forEach(self):
|
||||
self._test('function f(){var ret = []; var l = [4, 2]; '
|
||||
'var log = function(e,i,a){ret.push([e,i,a]);}; '
|
||||
'l.forEach(log); '
|
||||
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
||||
[2, 4, 1, [4, 2]])
|
||||
self._test('function f(){var ret = []; var l = [4, 2]; '
|
||||
'var log = function(e,i,a){this.push([e,i,a]);}; '
|
||||
'l.forEach(log, ret); '
|
||||
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
||||
[2, 4, 1, [4, 2]])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@ -12,6 +13,7 @@ import re
|
||||
import string
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_contextlib_suppress,
|
||||
compat_open as open,
|
||||
compat_str,
|
||||
compat_urlretrieve,
|
||||
@ -50,23 +52,33 @@ _SIG_TESTS = [
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
84,
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
|
||||
83,
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||
)
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
]
|
||||
|
||||
_NSIG_TESTS = [
|
||||
@ -142,6 +154,10 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
|
||||
'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
||||
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
||||
@ -154,6 +170,10 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
|
||||
'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
|
||||
'1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
||||
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
||||
@ -182,6 +202,14 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
|
||||
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/f8f53e1a/player_ias.vflset/en_US/base.js',
|
||||
'VTQOUOv0mCIeJ7i8kZB', 'kcfD8wy0sNLyNQ',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@ -216,11 +244,9 @@ class TestSignature(unittest.TestCase):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
with compat_contextlib_suppress(OSError):
|
||||
for f in os.listdir(self.TESTDATA_DIR):
|
||||
os.remove(f)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def t_factory(name, sig_func, url_pattern):
|
||||
@ -254,11 +280,12 @@ def signature(jscode, sig_input):
|
||||
|
||||
def n_sig(jscode, sig_input):
|
||||
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
|
||||
return JSInterpreter(jscode).call_function(funcname, sig_input)
|
||||
return JSInterpreter(jscode).call_function(
|
||||
funcname, sig_input, _ytdl_do_not_return=sig_input)
|
||||
|
||||
|
||||
make_sig_test = t_factory(
|
||||
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
|
||||
'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
|
||||
for test_spec in _SIG_TESTS:
|
||||
make_sig_test(*test_spec)
|
||||
|
||||
|
@ -1579,19 +1579,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self.to_screen('Extracted signature function:\n' + code)
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
# Examples where `sig` is funcname:
|
||||
# sig=function(a){a=a.split(""); ... ;return a.join("")};
|
||||
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
|
||||
# {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
|
||||
# sig=function(J){J=J.split(""); ... ;return J.join("")};
|
||||
# ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
|
||||
# {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
|
||||
funcname = self._search_regex(
|
||||
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
(r'\b(?P<var>[a-zA-Z0-9$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\((?P=var)\)\)',
|
||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
|
||||
# Old patterns
|
||||
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
||||
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
|
||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
# Obsolete patterns
|
||||
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
jscode, 'Initial JS player signature function name', group='sig')
|
||||
|
||||
@ -1658,36 +1665,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_n_function_name(self, jscode):
|
||||
func_name, idx = self._search_regex(
|
||||
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
||||
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
|
||||
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
|
||||
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
|
||||
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
|
||||
# or: (b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
|
||||
# or: (b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c)
|
||||
# or: (PL(a),b=a.j.n||null)&&(b=narray[idx](b)
|
||||
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||||
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||
# old: (b=a.get("n"))&&(b=narray[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||
# older: (b=a.get("n"))&&(b=nfunc(b)
|
||||
r'''(?x)
|
||||
\((?:[\w$()\s]+,)*?\s* # (
|
||||
(?P<b>[a-z])\s*=\s* # b=
|
||||
(?:
|
||||
(?: # expect ,c=a.get(b) (etc)
|
||||
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
|
||||
"n+"\[\s*\+?s*[\w$.]+\s*]
|
||||
)\s*(?:,[\w$()\s]+(?=,))*|
|
||||
(?P<old>[\w$]+) # a (old[er])
|
||||
)\s*
|
||||
(?(old)
|
||||
# b.get("n")
|
||||
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
|
||||
| # ,c=a.get(b)
|
||||
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
|
||||
)
|
||||
# interstitial junk
|
||||
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
|
||||
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
|
||||
# nfunc|nfunc[idx]
|
||||
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
|
||||
# (expr, ...,
|
||||
\((?:(?:\s*[\w$]+\s*=)?(?:[\w$"+\.\s(\[]+(?:[)\]]\s*)?),)*
|
||||
# b=...
|
||||
(?P<b>[\w$]+)\s*=\s*(?!(?P=b)[^\w$])[\w$]+\s*(?:(?:
|
||||
\.\s*[\w$]+ |
|
||||
\[\s*[\w$]+\s*\] |
|
||||
\.\s*get\s*\(\s*[\w$"]+\s*\)
|
||||
)\s*){,2}(?:\s*\|\|\s*null(?=\s*\)))?\s*
|
||||
\)\s*&&\s*\( # ...)&&(
|
||||
# b = nfunc, b = narray[idx]
|
||||
(?P=b)\s*=\s*(?P<nfunc>[\w$]+)\s*
|
||||
(?:\[\s*(?P<idx>[\w$]+)\s*\]\s*)?
|
||||
# (...)
|
||||
\(\s*[\w$]+\s*\)
|
||||
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
|
||||
default=(None, None))
|
||||
# thx bashonly: yt-dlp/yt-dlp/pull/10611
|
||||
@ -1697,15 +1697,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
r'''(?xs)
|
||||
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
||||
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
|
||||
\s*\{(?:(?!};).)+?["']enhanced_except_
|
||||
\s*\{(?:(?!};).)+?(?:
|
||||
["']enhanced_except_ |
|
||||
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+
|
||||
)
|
||||
''', jscode, 'Initial JS player n function name', group='name')
|
||||
if not idx:
|
||||
return func_name
|
||||
|
||||
return self._parse_json(self._search_regex(
|
||||
r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
|
||||
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
|
||||
func_name, transform_source=js_to_json)[int(idx)]
|
||||
return self._search_json(
|
||||
r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
|
||||
'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
|
||||
func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
|
||||
transform_source=js_to_json)[int(idx)]
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
@ -1728,13 +1732,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def extract_nsig(s):
|
||||
try:
|
||||
ret = func([s])
|
||||
ret = func([s], kwargs={'_ytdl_do_not_return': s})
|
||||
except JSInterpreter.Exception:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
|
||||
|
||||
if ret.startswith('enhanced_except_'):
|
||||
if ret.startswith('enhanced_except_') or ret.endswith(s):
|
||||
raise JSInterpreter.Exception('Signature function returned an exception')
|
||||
return ret
|
||||
|
||||
|
@ -368,7 +368,7 @@ class Debugger(object):
|
||||
raise
|
||||
if cls.ENABLED and stmt.strip():
|
||||
if should_ret or repr(ret) != stmt:
|
||||
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||
cls.write(['->', '=>'][bool(should_ret)], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||
return ret, should_ret
|
||||
return interpret_statement
|
||||
|
||||
@ -397,6 +397,9 @@ class JSInterpreter(object):
|
||||
RE_FLAGS = {
|
||||
# special knowledge: Python's re flags are bitmask values, current max 128
|
||||
# invent new bitmask values well above that for literal parsing
|
||||
# JS 'u' flag is effectively always set (surrogate pairs aren't seen),
|
||||
# but \u{...} and \p{...} escapes aren't handled); no additional JS 'v'
|
||||
# features are supported
|
||||
# TODO: execute matches with these flags (remaining: d, y)
|
||||
'd': 1024, # Generate indices for substring matches
|
||||
'g': 2048, # Global search
|
||||
@ -404,6 +407,7 @@ class JSInterpreter(object):
|
||||
'm': re.M, # Multi-line search
|
||||
's': re.S, # Allows . to match newline characters
|
||||
'u': re.U, # Treat a pattern as a sequence of unicode code points
|
||||
'v': re.U, # Like 'u' with extended character class and \p{} syntax
|
||||
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
|
||||
}
|
||||
|
||||
@ -484,9 +488,18 @@ class JSInterpreter(object):
|
||||
skipping = 0
|
||||
if skip_delims:
|
||||
skip_delims = variadic(skip_delims)
|
||||
skip_txt = None
|
||||
for idx, char in enumerate(expr):
|
||||
if skip_txt and idx <= skip_txt[1]:
|
||||
continue
|
||||
paren_delta = 0
|
||||
if not in_quote:
|
||||
if char == '/' and expr[idx:idx + 2] == '/*':
|
||||
# skip a comment
|
||||
skip_txt = expr[idx:].find('*/', 2)
|
||||
skip_txt = [idx, idx + skip_txt + 1] if skip_txt >= 2 else None
|
||||
if skip_txt:
|
||||
continue
|
||||
if char in _MATCHING_PARENS:
|
||||
counters[_MATCHING_PARENS[char]] += 1
|
||||
paren_delta = 1
|
||||
@ -519,11 +532,18 @@ class JSInterpreter(object):
|
||||
if pos < delim_len:
|
||||
pos += 1
|
||||
continue
|
||||
if skip_txt and skip_txt[0] >= start and skip_txt[1] <= idx - delim_len:
|
||||
yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1: idx - delim_len]
|
||||
else:
|
||||
yield expr[start: idx - delim_len]
|
||||
skip_txt = None
|
||||
start, pos = idx + 1, 0
|
||||
splits += 1
|
||||
if max_split and splits >= max_split:
|
||||
break
|
||||
if skip_txt and skip_txt[0] >= start:
|
||||
yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1:]
|
||||
else:
|
||||
yield expr[start:]
|
||||
|
||||
@classmethod
|
||||
@ -583,7 +603,7 @@ class JSInterpreter(object):
|
||||
|
||||
# used below
|
||||
_VAR_RET_THROW_RE = re.compile(r'''(?x)
|
||||
(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?P<throw>throw\s+)
|
||||
(?:(?P<var>var|const|let)\s+|(?P<ret>return)(?:\s+|(?=["'])|$)|(?P<throw>throw)\s+)
|
||||
''')
|
||||
_COMPOUND_RE = re.compile(r'''(?x)
|
||||
(?P<try>try)\s*\{|
|
||||
@ -663,7 +683,7 @@ class JSInterpreter(object):
|
||||
expr = stmt[len(m.group(0)):].strip()
|
||||
if m.group('throw'):
|
||||
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
|
||||
should_return = not m.group('var')
|
||||
should_return = 'return' if m.group('ret') else False
|
||||
if not expr:
|
||||
return None, should_return
|
||||
|
||||
@ -948,14 +968,20 @@ class JSInterpreter(object):
|
||||
return _Infinity, should_return
|
||||
|
||||
elif md.get('return'):
|
||||
return local_vars[m.group('name')], should_return
|
||||
ret = local_vars[m.group('name')]
|
||||
# challenge may try to force returning the original value
|
||||
# use an optional internal var to block this
|
||||
if should_return == 'return':
|
||||
if '_ytdl_do_not_return' not in local_vars:
|
||||
return ret, True
|
||||
return (ret, True) if ret != local_vars['_ytdl_do_not_return'] else (ret, False)
|
||||
else:
|
||||
return ret, should_return
|
||||
|
||||
try:
|
||||
with compat_contextlib_suppress(ValueError):
|
||||
ret = json.loads(js_to_json(expr)) # strict=True)
|
||||
if not md.get('attribute'):
|
||||
return ret, should_return
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if md.get('indexing'):
|
||||
val = local_vars[m.group('in')]
|
||||
@ -1047,13 +1073,47 @@ class JSInterpreter(object):
|
||||
raise self.Exception('Unsupported Math method ' + member, expr=expr)
|
||||
|
||||
if member == 'split':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) == 1, 'with limit argument is not implemented')
|
||||
return obj.split(argvals[0]) if argvals[0] else list(obj)
|
||||
assertion(len(argvals) <= 2, 'takes at most two arguments')
|
||||
if len(argvals) > 1:
|
||||
limit = argvals[1]
|
||||
assertion(isinstance(limit, int) and limit >= 0, 'integer limit >= 0')
|
||||
if limit == 0:
|
||||
return []
|
||||
else:
|
||||
limit = 0
|
||||
if len(argvals) == 0:
|
||||
argvals = [JS_Undefined]
|
||||
elif isinstance(argvals[0], self.JS_RegExp):
|
||||
# avoid re.split(), similar but not enough
|
||||
|
||||
def where():
|
||||
for m in argvals[0].finditer(obj):
|
||||
yield m.span(0)
|
||||
yield (None, None)
|
||||
|
||||
def splits(limit=limit):
|
||||
i = 0
|
||||
for j, jj in where():
|
||||
if j == jj == 0:
|
||||
continue
|
||||
if j is None and i >= len(obj):
|
||||
break
|
||||
yield obj[i:j]
|
||||
if jj is None or limit == 1:
|
||||
break
|
||||
limit -= 1
|
||||
i = jj
|
||||
|
||||
return list(splits())
|
||||
return (
|
||||
obj.split(argvals[0], limit - 1) if argvals[0] and argvals[0] != JS_Undefined
|
||||
else list(obj)[:limit or None])
|
||||
elif member == 'join':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
||||
return argvals[0].join(obj)
|
||||
assertion(len(argvals) <= 1, 'takes at most one argument')
|
||||
return (',' if len(argvals) == 0 else argvals[0]).join(
|
||||
('' if x in (None, JS_Undefined) else _js_toString(x))
|
||||
for x in obj)
|
||||
elif member == 'reverse':
|
||||
assertion(not argvals, 'does not take any arguments')
|
||||
obj.reverse()
|
||||
@ -1075,37 +1135,31 @@ class JSInterpreter(object):
|
||||
index, how_many = map(int, (argvals + [len(obj)])[:2])
|
||||
if index < 0:
|
||||
index += len(obj)
|
||||
add_items = argvals[2:]
|
||||
res = []
|
||||
for _ in range(index, min(index + how_many, len(obj))):
|
||||
res.append(obj.pop(index))
|
||||
for i, item in enumerate(add_items):
|
||||
obj.insert(index + i, item)
|
||||
res = [obj.pop(index)
|
||||
for _ in range(index, min(index + how_many, len(obj)))]
|
||||
obj[index:index] = argvals[2:]
|
||||
return res
|
||||
elif member == 'unshift':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
for item in reversed(argvals):
|
||||
obj.insert(0, item)
|
||||
return obj
|
||||
elif member == 'pop':
|
||||
elif member in ('shift', 'pop'):
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(not argvals, 'does not take any arguments')
|
||||
if not obj:
|
||||
return
|
||||
return obj.pop()
|
||||
return obj.pop(0 if member == 'shift' else -1) if len(obj) > 0 else JS_Undefined
|
||||
elif member == 'unshift':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
# not enforced: assertion(argvals, 'takes one or more arguments')
|
||||
obj[0:0] = argvals
|
||||
return len(obj)
|
||||
elif member == 'push':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
# not enforced: assertion(argvals, 'takes one or more arguments')
|
||||
obj.extend(argvals)
|
||||
return obj
|
||||
return len(obj)
|
||||
elif member == 'forEach':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at most 2 arguments')
|
||||
f, this = (argvals + [''])[:2]
|
||||
return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
|
||||
elif member == 'indexOf':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at most 2 arguments')
|
||||
idx, start = (argvals + [0])[:2]
|
||||
try:
|
||||
return obj.index(idx, start)
|
||||
@ -1114,7 +1168,7 @@ class JSInterpreter(object):
|
||||
elif member == 'charCodeAt':
|
||||
assertion(isinstance(obj, compat_str), 'must be applied on a string')
|
||||
# assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced
|
||||
idx = argvals[0] if isinstance(argvals[0], int) else 0
|
||||
idx = argvals[0] if len(argvals) > 0 and isinstance(argvals[0], int) else 0
|
||||
if idx >= len(obj):
|
||||
return None
|
||||
return ord(obj[idx])
|
||||
@ -1165,7 +1219,7 @@ class JSInterpreter(object):
|
||||
yield self.interpret_expression(v, local_vars, allow_recursion)
|
||||
|
||||
def extract_object(self, objname):
|
||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||
_FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
|
||||
obj = {}
|
||||
fields = next(filter(None, (
|
||||
obj_m.group('fields') for obj_m in re.finditer(
|
||||
@ -1224,6 +1278,7 @@ class JSInterpreter(object):
|
||||
|
||||
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||
local_vars = {}
|
||||
|
||||
while True:
|
||||
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
|
||||
if mobj is None:
|
||||
@ -1234,10 +1289,11 @@ class JSInterpreter(object):
|
||||
[x.strip() for x in mobj.group('args').split(',')],
|
||||
body, local_vars, *global_stack))
|
||||
code = code[:start] + name + remaining
|
||||
|
||||
return self.build_function(argnames, code, local_vars, *global_stack)
|
||||
|
||||
def call_function(self, funcname, *args):
|
||||
return self.extract_function(funcname)(args)
|
||||
def call_function(self, funcname, *args, **kw_global_vars):
|
||||
return self.extract_function(funcname)(args, kw_global_vars)
|
||||
|
||||
@classmethod
|
||||
def build_arglist(cls, arg_text):
|
||||
@ -1256,8 +1312,9 @@ class JSInterpreter(object):
|
||||
global_stack = list(global_stack) or [{}]
|
||||
argnames = tuple(argnames)
|
||||
|
||||
def resf(args, kwargs={}, allow_recursion=100):
|
||||
global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
|
||||
def resf(args, kwargs=None, allow_recursion=100):
|
||||
kwargs = kwargs or {}
|
||||
global_stack[0].update(zip_longest(argnames, args, fillvalue=JS_Undefined))
|
||||
global_stack[0].update(kwargs)
|
||||
var_stack = LocalNameSpace(*global_stack)
|
||||
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
||||
|
Loading…
Reference in New Issue
Block a user