mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-31 17:53:11 +00:00 
			
		
		
		
	[rtve] improve extraction
- extract all formats - fix RTVE Infantil extraction(closes #24851) - extract is_live and series
This commit is contained in:
		
							parent
							
								
									ebfd66c4b1
								
							
						
					
					
						commit
						9955bb4a27
					
				| @ -2,8 +2,9 @@ | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import base64 | ||||
| import io | ||||
| import re | ||||
| import time | ||||
| import sys | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
| @ -14,56 +15,13 @@ from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     qualities, | ||||
|     remove_end, | ||||
|     remove_start, | ||||
|     sanitized_Request, | ||||
|     std_headers, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| def _decrypt_url(png): | ||||
|     encrypted_data = compat_b64decode(png) | ||||
|     text_index = encrypted_data.find(b'tEXt') | ||||
|     text_chunk = encrypted_data[text_index - 4:] | ||||
|     length = compat_struct_unpack('!I', text_chunk[:4])[0] | ||||
|     # Use bytearray to get integers when iterating in both python 2.x and 3.x | ||||
|     data = bytearray(text_chunk[8:8 + length]) | ||||
|     data = [chr(b) for b in data if b != 0] | ||||
|     hash_index = data.index('#') | ||||
|     alphabet_data = data[:hash_index] | ||||
|     url_data = data[hash_index + 1:] | ||||
|     if url_data[0] == 'H' and url_data[3] == '%': | ||||
|         # remove useless HQ%% at the start | ||||
|         url_data = url_data[4:] | ||||
| 
 | ||||
|     alphabet = [] | ||||
|     e = 0 | ||||
|     d = 0 | ||||
|     for l in alphabet_data: | ||||
|         if d == 0: | ||||
|             alphabet.append(l) | ||||
|             d = e = (e + 1) % 4 | ||||
|         else: | ||||
|             d -= 1 | ||||
|     url = '' | ||||
|     f = 0 | ||||
|     e = 3 | ||||
|     b = 1 | ||||
|     for letter in url_data: | ||||
|         if f == 0: | ||||
|             l = int(letter) * 10 | ||||
|             f = 1 | ||||
|         else: | ||||
|             if e == 0: | ||||
|                 l += int(letter) | ||||
|                 url += alphabet[l] | ||||
|                 e = (b + 3) % 4 | ||||
|                 f = 0 | ||||
|                 b += 1 | ||||
|             else: | ||||
|                 e -= 1 | ||||
| 
 | ||||
|     return url | ||||
| _bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x)) | ||||
| 
 | ||||
| 
 | ||||
| class RTVEALaCartaIE(InfoExtractor): | ||||
| @ -79,28 +37,31 @@ class RTVEALaCartaIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', | ||||
|             'duration': 5024.566, | ||||
|             'series': 'Balonmano', | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], | ||||
|     }, { | ||||
|         'note': 'Live stream', | ||||
|         'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', | ||||
|         'info_dict': { | ||||
|             'id': '1694255', | ||||
|             'ext': 'flv', | ||||
|             'title': 'TODO', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'live stream', | ||||
|         }, | ||||
|         'skip': 'The f4m manifest can\'t be used yet', | ||||
|     }, { | ||||
|         'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', | ||||
|         'md5': 'e55e162379ad587e9640eda4f7353c0f', | ||||
|         'md5': 'd850f3c8731ea53952ebab489cf81cbf', | ||||
|         'info_dict': { | ||||
|             'id': '4236788', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Servir y proteger - Capítulo 104 ', | ||||
|             'title': 'Servir y proteger - Capítulo 104', | ||||
|             'duration': 3222.0, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # requires ffmpeg | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], | ||||
|     }, { | ||||
|         'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', | ||||
|         'only_matching': True, | ||||
| @ -111,58 +72,102 @@ class RTVEALaCartaIE(InfoExtractor): | ||||
| 
 | ||||
|     def _real_initialize(self): | ||||
|         user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8') | ||||
|         manager_info = self._download_json( | ||||
|         self._manager = self._download_json( | ||||
|             'http://www.rtve.es/odin/loki/' + user_agent_b64, | ||||
|             None, 'Fetching manager info') | ||||
|         self._manager = manager_info['manager'] | ||||
|             None, 'Fetching manager info')['manager'] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _decrypt_url(png): | ||||
|         encrypted_data = io.BytesIO(compat_b64decode(png)[8:]) | ||||
|         while True: | ||||
|             length = compat_struct_unpack('!I', encrypted_data.read(4))[0] | ||||
|             chunk_type = encrypted_data.read(4) | ||||
|             if chunk_type == b'IEND': | ||||
|                 break | ||||
|             data = encrypted_data.read(length) | ||||
|             if chunk_type == b'tEXt': | ||||
|                 alphabet_data, text = data.split(b'\0') | ||||
|                 quality, url_data = text.split(b'%%') | ||||
|                 alphabet = [] | ||||
|                 e = 0 | ||||
|                 d = 0 | ||||
|                 for l in _bytes_to_chr(alphabet_data): | ||||
|                     if d == 0: | ||||
|                         alphabet.append(l) | ||||
|                         d = e = (e + 1) % 4 | ||||
|                     else: | ||||
|                         d -= 1 | ||||
|                 url = '' | ||||
|                 f = 0 | ||||
|                 e = 3 | ||||
|                 b = 1 | ||||
|                 for letter in _bytes_to_chr(url_data): | ||||
|                     if f == 0: | ||||
|                         l = int(letter) * 10 | ||||
|                         f = 1 | ||||
|                     else: | ||||
|                         if e == 0: | ||||
|                             l += int(letter) | ||||
|                             url += alphabet[l] | ||||
|                             e = (b + 3) % 4 | ||||
|                             f = 0 | ||||
|                             b += 1 | ||||
|                         else: | ||||
|                             e -= 1 | ||||
| 
 | ||||
|                 yield quality.decode(), url | ||||
|             encrypted_data.read(4)  # CRC | ||||
| 
 | ||||
|     def _extract_png_formats(self, video_id): | ||||
|         png = self._download_webpage( | ||||
|             'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id), | ||||
|             video_id, 'Downloading url information', query={'q': 'v2'}) | ||||
|         q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) | ||||
|         formats = [] | ||||
|         for quality, video_url in self._decrypt_url(png): | ||||
|             ext = determine_ext(video_url) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     video_url, video_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             elif ext == 'mpd': | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     video_url, video_id, 'dash', fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'format_id': quality, | ||||
|                     'quality': q(quality), | ||||
|                     'url': video_url, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         info = self._download_json( | ||||
|             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, | ||||
|             video_id)['page']['items'][0] | ||||
|         if info['state'] == 'DESPU': | ||||
|             raise ExtractorError('The video is no longer available', expected=True) | ||||
|         title = info['title'] | ||||
|         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) | ||||
|         png_request = sanitized_Request(png_url) | ||||
|         png_request.add_header('Referer', url) | ||||
|         png = self._download_webpage(png_request, video_id, 'Downloading url information') | ||||
|         video_url = _decrypt_url(png) | ||||
|         ext = determine_ext(video_url) | ||||
| 
 | ||||
|         formats = [] | ||||
|         if not video_url.endswith('.f4m') and ext != 'm3u8': | ||||
|             if '?' not in video_url: | ||||
|                 video_url = video_url.replace('resources/', 'auth/resources/') | ||||
|             video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve') | ||||
| 
 | ||||
|         if ext == 'm3u8': | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 video_url, video_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|         elif ext == 'f4m': | ||||
|             formats.extend(self._extract_f4m_formats( | ||||
|                 video_url, video_id, f4m_id='hds', fatal=False)) | ||||
|         else: | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|         title = info['title'].strip() | ||||
|         formats = self._extract_png_formats(video_id) | ||||
| 
 | ||||
|         subtitles = None | ||||
|         if info.get('sbtFile') is not None: | ||||
|             subtitles = self.extract_subtitles(video_id, info['sbtFile']) | ||||
|         sbt_file = info.get('sbtFile') | ||||
|         if sbt_file: | ||||
|             subtitles = self.extract_subtitles(video_id, sbt_file) | ||||
| 
 | ||||
|         is_live = info.get('live') is True | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'title': self._live_title(title) if is_live else title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': info.get('image'), | ||||
|             'page_url': url, | ||||
|             'subtitles': subtitles, | ||||
|             'duration': float_or_none(info.get('duration'), scale=1000), | ||||
|             'duration': float_or_none(info.get('duration'), 1000), | ||||
|             'is_live': is_live, | ||||
|             'series': info.get('programTitle'), | ||||
|         } | ||||
| 
 | ||||
|     def _get_subtitles(self, video_id, sub_file): | ||||
| @ -174,48 +179,26 @@ class RTVEALaCartaIE(InfoExtractor): | ||||
|             for s in subs) | ||||
| 
 | ||||
| 
 | ||||
| class RTVEInfantilIE(InfoExtractor): | ||||
| class RTVEInfantilIE(RTVEALaCartaIE): | ||||
|     IE_NAME = 'rtve.es:infantil' | ||||
|     IE_DESC = 'RTVE infantil' | ||||
|     _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/' | ||||
|     _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', | ||||
|         'md5': '915319587b33720b8e0357caaa6617e6', | ||||
|         'md5': '5747454717aedf9f9fdf212d1bcfc48d', | ||||
|         'info_dict': { | ||||
|             'id': '3040283', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Maneras de vivir', | ||||
|             'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG', | ||||
|             'thumbnail': r're:https?://.+/1426182947956\.JPG', | ||||
|             'duration': 357.958, | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         info = self._download_json( | ||||
|             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, | ||||
|             video_id)['page']['items'][0] | ||||
| 
 | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         vidplayer_id = self._search_regex( | ||||
|             r' id="vidplayer([0-9]+)"', webpage, 'internal video ID') | ||||
| 
 | ||||
|         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id | ||||
|         png = self._download_webpage(png_url, video_id, 'Downloading url information') | ||||
|         video_url = _decrypt_url(png) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'ext': 'mp4', | ||||
|             'title': info['title'], | ||||
|             'url': video_url, | ||||
|             'thumbnail': info.get('image'), | ||||
|             'duration': float_or_none(info.get('duration'), scale=1000), | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class RTVELiveIE(InfoExtractor): | ||||
| class RTVELiveIE(RTVEALaCartaIE): | ||||
|     IE_NAME = 'rtve.es:live' | ||||
|     IE_DESC = 'RTVE.es live streams' | ||||
|     _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' | ||||
| @ -225,7 +208,7 @@ class RTVELiveIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': 'la-1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$', | ||||
|             'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'live stream', | ||||
| @ -234,29 +217,22 @@ class RTVELiveIE(InfoExtractor): | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         start_time = time.gmtime() | ||||
|         video_id = mobj.group('id') | ||||
| 
 | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es') | ||||
|         title = remove_start(title, 'Estoy viendo ') | ||||
|         title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time) | ||||
| 
 | ||||
|         vidplayer_id = self._search_regex( | ||||
|             (r'playerId=player([0-9]+)', | ||||
|              r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', | ||||
|              r'data-id=["\'](\d+)'), | ||||
|             webpage, 'internal video ID') | ||||
|         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id | ||||
|         png = self._download_webpage(png_url, video_id, 'Downloading url information') | ||||
|         m3u8_url = _decrypt_url(png) | ||||
|         formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'title': self._live_title(title), | ||||
|             'formats': self._extract_png_formats(vidplayer_id), | ||||
|             'is_live': True, | ||||
|         } | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Remita Amine
						Remita Amine