1
0
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-12-16 14:36:47 +00:00

Compare commits

...

3 Commits

Author SHA1 Message Date
Remita Amine
082da36416 [nrk] reduce requests for Radio series 2020-12-05 16:24:49 +01:00
Remita Amine
6bf95b15ee [nrk] reduce the number of instalments requests 2020-12-05 15:35:29 +01:00
Remita Amine
4c93b2fd15 [nrk] improve format extraction 2020-12-05 09:13:42 +01:00

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools import itertools
import random
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -22,13 +23,26 @@ from ..utils import (
class NRKBaseIE(InfoExtractor): class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO']
_CDN_REPL_REGEX = r'''(?x)://
(?:
nrkod\d{1,2}-httpcache0-47115-cacheod0\.dna\.ip-only\.net/47115-cacheod0|
nrk-od-no\.telenorcdn\.net|
minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
)/'''
def _extract_nrk_formats(self, asset_url, video_id): def _extract_nrk_formats(self, asset_url, video_id):
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url): if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
return self._extract_akamai_formats( return self._extract_akamai_formats(
re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id) re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id)
return self._extract_m3u8_formats( asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url), formats = self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
formats = self._extract_m3u8_formats(
re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
video_id, 'mp4', 'm3u8_native', fatal=False) video_id, 'mp4', 'm3u8_native', fatal=False)
return formats
def _raise_error(self, data): def _raise_error(self, data):
MESSAGES = { MESSAGES = {
@ -107,8 +121,11 @@ class NRKIE(NRKBaseIE):
def _extract_from_playback(self, video_id): def _extract_from_playback(self, video_id):
path_templ = 'playback/%s/' + video_id path_templ = 'playback/%s/' + video_id
call_playback_api = lambda x: self._call_api(path_templ % x, video_id, x)
manifest = call_playback_api('manifest') def call_playback_api(item, query=None):
return self._call_api(path_templ % item, video_id, item, query=query)
# known values for preferredCdn: akamai, iponly, minicdn and telenor
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
if manifest.get('playability') == 'nonPlayable': if manifest.get('playability') == 'nonPlayable':
self._raise_error(manifest['nonPlayable']) self._raise_error(manifest['nonPlayable'])
@ -195,7 +212,6 @@ class NRKTVIE(NRKBaseIE):
'series': '20 spørsmål', 'series': '20 spørsmål',
'episode': '23.05.2014', 'episode': '23.05.2014',
}, },
'skip': 'NoProgramRights',
}, { }, {
'url': 'https://tv.nrk.no/program/mdfp15000514', 'url': 'https://tv.nrk.no/program/mdfp15000514',
'info_dict': { 'info_dict': {
@ -214,15 +230,15 @@ class NRKTVIE(NRKBaseIE):
# single playlist video # single playlist video
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
'info_dict': { 'info_dict': {
'id': 'MSPO40010515-part2', 'id': 'MSPO40010515AH',
'ext': 'flv', 'ext': 'mp4',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 'description': 'md5:c03aba1e917561eface5214020551b7a',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Video is geo restricted'], 'expected_warnings': ['Failed to download m3u8 information'],
'skip': 'particular part is not supported currently', 'skip': 'particular part is not supported currently',
}, { }, {
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
@ -232,7 +248,7 @@ class NRKTVIE(NRKBaseIE):
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:c03aba1e917561eface5214020551b7a', 'description': 'md5:c03aba1e917561eface5214020551b7a',
}, },
'skip': 'Video is geo restricted', 'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13', 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
'info_dict': { 'info_dict': {
@ -312,6 +328,7 @@ class NRKTVIE(NRKBaseIE):
asset_url = asset.get('url') asset_url = asset.get('url')
if not asset_url or asset_url in urls: if not asset_url or asset_url in urls:
continue continue
urls.append(asset_url)
formats = self._extract_nrk_formats(asset_url, video_id) formats = self._extract_nrk_formats(asset_url, video_id)
if not formats: if not formats:
continue continue
@ -677,10 +694,13 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
site, series_id = re.match(self._VALID_URL, url).groups() site, series_id = re.match(self._VALID_URL, url).groups()
domain = 'radio' if site == 'radio.nrk' else 'tv' is_radio = site == 'radio.nrk'
domain = 'radio' if is_radio else 'tv'
size_prefix = 'p' if is_radio else 'embeddedInstalmentsP'
series = self._call_api( series = self._call_api(
'%s/catalog/series/%s' % (domain, series_id), series_id, 'serie') '%s/catalog/series/%s' % (domain, series_id),
series_id, 'serie', query={size_prefix + 'ageSize': 50})
titles = try_get(series, [ titles = try_get(series, [
lambda x: x['titles'], lambda x: x['titles'],
lambda x: x[x['type']]['titles'], lambda x: x[x['type']]['titles'],