mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-31 17:53:11 +00:00 
			
		
		
		
	[rtbf] improve extraction
- add support for audio and live streams(closes #11923)(closes #9638) - extract HLS, DASH and all HTTP formats - extract subtitles - fixup specific http urls(fixes #16101)
This commit is contained in:
		
							parent
							
								
									734d461ca0
								
							
						
					
					
						commit
						764cd4e6f3
					
				| @ -1,10 +1,14 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     strip_or_none, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| @ -14,20 +18,19 @@ class RTBFIE(InfoExtractor): | ||||
|         (?: | ||||
|             video/[^?]+\?.*\bid=| | ||||
|             ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| | ||||
|             auvio/[^/]+\?.*id= | ||||
|             auvio/[^/]+\?.*\b(?P<live>l)?id= | ||||
|         )(?P<id>\d+)''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', | ||||
|         'md5': '799f334ddf2c0a582ba80c44655be570', | ||||
|         'md5': '8c876a1cceeb6cf31b476461ade72384', | ||||
|         'info_dict': { | ||||
|             'id': '1921274', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Les Diables au coeur (épisode 2)', | ||||
|             'description': 'Football - Diables Rouges', | ||||
|             'duration': 3099, | ||||
|             'description': '(du 25/04/2014)', | ||||
|             'duration': 3099.54, | ||||
|             'upload_date': '20140425', | ||||
|             'timestamp': 1398456336, | ||||
|             'uploader': 'rtbfsport', | ||||
|             'timestamp': 1398456300, | ||||
|         } | ||||
|     }, { | ||||
|         # geo restricted | ||||
| @ -39,6 +42,18 @@ class RTBFIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # Live | ||||
|         'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # Audio | ||||
|         'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # With Subtitle | ||||
|         'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' | ||||
|     _PROVIDERS = { | ||||
| @ -53,46 +68,94 @@ class RTBFIE(InfoExtractor): | ||||
|     ] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         data = self._download_json( | ||||
|             'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id) | ||||
|         live, media_id = re.match(self._VALID_URL, url).groups() | ||||
|         embed_page = self._download_webpage( | ||||
|             'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), | ||||
|             media_id, query={'id': media_id}) | ||||
|         data = self._parse_json(self._html_search_regex( | ||||
|             r'data-media="([^"]+)"', embed_page, 'media data'), media_id) | ||||
| 
 | ||||
|         error = data.get('error') | ||||
|         if error: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
| 
 | ||||
|         data = data['data'] | ||||
| 
 | ||||
|         provider = data.get('provider') | ||||
|         if provider in self._PROVIDERS: | ||||
|             return self.url_result(data['url'], self._PROVIDERS[provider]) | ||||
| 
 | ||||
|         title = data['title'] | ||||
|         is_live = data.get('isLive') | ||||
|         if is_live: | ||||
|             title = self._live_title(title) | ||||
|         height_re = r'-(\d+)p\.' | ||||
|         formats = [] | ||||
|         for key, format_id in self._QUALITIES: | ||||
|             format_url = data.get(key + 'Url') | ||||
|             if format_url: | ||||
| 
 | ||||
|         m3u8_url = data.get('urlHlsAes128') or data.get('urlHls') | ||||
|         if m3u8_url: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
| 
 | ||||
|         fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x | ||||
|         http_url = data.get('url') | ||||
|         if formats and http_url and re.search(height_re, http_url): | ||||
|             http_url = fix_url(http_url) | ||||
|             for m3u8_f in formats.copy(): | ||||
|                 height = m3u8_f.get('height') | ||||
|                 if not height: | ||||
|                     continue | ||||
|                 f = m3u8_f.copy() | ||||
|                 del f['protocol'] | ||||
|                 f.update({ | ||||
|                     'format_id': m3u8_f['format_id'].replace('hls-', 'http-'), | ||||
|                     'url': re.sub(height_re, '-%dp.' % height, http_url), | ||||
|                 }) | ||||
|                 formats.append(f) | ||||
|         else: | ||||
|             sources = data.get('sources') or {} | ||||
|             for key, format_id in self._QUALITIES: | ||||
|                 format_url = sources.get(key) | ||||
|                 if not format_url: | ||||
|                     continue | ||||
|                 height = int_or_none(self._search_regex( | ||||
|                     height_re, format_url, 'height', default=None)) | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': format_url, | ||||
|                     'url': fix_url(format_url), | ||||
|                     'height': height, | ||||
|                 }) | ||||
| 
 | ||||
|         thumbnails = [] | ||||
|         for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items(): | ||||
|             if thumbnail_id != 'default': | ||||
|                 thumbnails.append({ | ||||
|                     'url': self._IMAGE_HOST + thumbnail_url, | ||||
|                     'id': thumbnail_id, | ||||
|                 }) | ||||
|         mpd_url = data.get('urlDash') | ||||
|         if not data.get('drm') and mpd_url: | ||||
|             formats.extend(self._extract_mpd_formats( | ||||
|                 mpd_url, media_id, mpd_id='dash', fatal=False)) | ||||
| 
 | ||||
|         audio_url = data.get('urlAudio') | ||||
|         if audio_url: | ||||
|             formats.append({ | ||||
|                 'format_id': 'audio', | ||||
|                 'url': audio_url, | ||||
|                 'vcodec': 'none', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
|         subtitles = {} | ||||
|         for track in (data.get('tracks') or {}).values(): | ||||
|             sub_url = track.get('url') | ||||
|             if not sub_url: | ||||
|                 continue | ||||
|             subtitles.setdefault(track.get('lang') or 'fr', []).append({ | ||||
|                 'url': sub_url, | ||||
|             }) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'id': media_id, | ||||
|             'formats': formats, | ||||
|             'title': data['title'], | ||||
|             'description': data.get('description') or data.get('subtitle'), | ||||
|             'thumbnails': thumbnails, | ||||
|             'duration': data.get('duration') or data.get('realDuration'), | ||||
|             'timestamp': int_or_none(data.get('created')), | ||||
|             'view_count': int_or_none(data.get('viewCount')), | ||||
|             'uploader': data.get('channel'), | ||||
|             'tags': data.get('tags'), | ||||
|             'title': title, | ||||
|             'description': strip_or_none(data.get('description')), | ||||
|             'thumbnail': data.get('thumbnail'), | ||||
|             'duration': float_or_none(data.get('realDuration')), | ||||
|             'timestamp': int_or_none(data.get('liveFrom')), | ||||
|             'series': data.get('programLabel'), | ||||
|             'subtitles': subtitles, | ||||
|             'is_live': is_live, | ||||
|         } | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Remita Amine
						Remita Amine