mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-31 17:53:11 +00:00 
			
		
		
		
	[ORF] Re-factor and updateORFFM4StoryIE
				
					
				
			* fix getting media via DASH instead of inaccessible mp4 * also get in-page YT media
This commit is contained in:
		
							parent
							
								
									e39466051f
								
							
						
					
					
						commit
						e20ca543f0
					
				| @ -6,6 +6,7 @@ import functools | ||||
| import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
| @ -14,10 +15,8 @@ from ..utils import ( | ||||
|     int_or_none, | ||||
|     merge_dicts, | ||||
|     mimetype2ext, | ||||
|     orderedSet, | ||||
|     parse_age_limit, | ||||
|     parse_iso8601, | ||||
|     remove_end, | ||||
|     strip_jsonp, | ||||
|     txt_or_none, | ||||
|     unified_strdate, | ||||
| @ -305,11 +304,90 @@ class ORFPodcastIE(ORFRadioBase): | ||||
|         }, self._extract_podcast_upload(data), rev=True) | ||||
| 
 | ||||
| 
 | ||||
| class ORFIPTVIE(InfoExtractor): | ||||
| class ORFIPTVBase(InfoExtractor): | ||||
|     _TITLE_STRIP_RE = '' | ||||
| 
 | ||||
|     def _extract_video(self, video_id, webpage, fatal=False): | ||||
| 
 | ||||
|         data = self._download_json( | ||||
|             'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, | ||||
|             video_id)[0] | ||||
| 
 | ||||
|         video = traverse_obj(data, ( | ||||
|             'sources', ('default', 'q8c'), | ||||
|             T(lambda x: x if x['loadBalancerUrl'] else None), | ||||
|             any)) | ||||
| 
 | ||||
|         load_balancer_url = video['loadBalancerUrl'] | ||||
| 
 | ||||
|         try: | ||||
|             rendition = self._download_json( | ||||
|                 load_balancer_url, video_id, transform_source=strip_jsonp) | ||||
|         except ExtractorError: | ||||
|             rendition = None | ||||
| 
 | ||||
|         if not rendition: | ||||
|             rendition = { | ||||
|                 'redirect': { | ||||
|                     'smil': re.sub( | ||||
|                         r'(/)jsonp(/.+\.)mp4$', r'\1dash\2smil/manifest.mpd', | ||||
|                         load_balancer_url), | ||||
|                 }, | ||||
|             } | ||||
| 
 | ||||
|         f = traverse_obj(video, { | ||||
|             'abr': ('audioBitrate', T(int_or_none)), | ||||
|             'vbr': ('bitrate', T(int_or_none)), | ||||
|             'fps': ('videoFps', T(int_or_none)), | ||||
|             'width': ('videoWidth', T(int_or_none)), | ||||
|             'height': ('videoHeight', T(int_or_none)), | ||||
|         }) | ||||
| 
 | ||||
|         formats = [] | ||||
|         for format_id, format_url in traverse_obj(rendition, ( | ||||
|                 'redirect', T(dict.items), Ellipsis)): | ||||
|             if format_id == 'rtmp': | ||||
|                 ff = f.copy() | ||||
|                 ff.update({ | ||||
|                     'url': format_url, | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|                 formats.append(ff) | ||||
|             elif determine_ext(format_url) == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     format_url, video_id, f4m_id=format_id)) | ||||
|             elif determine_ext(format_url) == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     format_url, video_id, 'mp4', m3u8_id=format_id, | ||||
|                     entry_protocol='m3u8_native')) | ||||
|             elif determine_ext(format_url) == 'mpd': | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     format_url, video_id, mpd_id=format_id)) | ||||
| 
 | ||||
|         if formats or fatal: | ||||
|             self._sort_formats(formats) | ||||
|         else: | ||||
|             return | ||||
| 
 | ||||
|         return merge_dicts({ | ||||
|             'id': video_id, | ||||
|             'title': re.sub(self._TITLE_STRIP_RE, '', self._og_search_title(webpage)), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'upload_date': unified_strdate(self._html_search_meta( | ||||
|                 'dc.date', webpage, 'upload date', fatal=False)), | ||||
|             'formats': formats, | ||||
|         }, traverse_obj(data, { | ||||
|             'duration': ('duration', T(k_float_or_none)), | ||||
|             'thumbnail': ('sources', 'default', 'preview', T(url_or_none)), | ||||
|         }), rev=True) | ||||
| 
 | ||||
| 
 | ||||
| class ORFIPTVIE(ORFIPTVBase): | ||||
|     IE_NAME = 'orf:iptv' | ||||
|     IE_DESC = 'iptv.ORF.at' | ||||
|     _WORKING = False  # URLs redirect to orf.at/ | ||||
|     _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' | ||||
|     _TITLE_STRIP_RE = r'\s+-\s+iptv\.ORF\.at\S*$' | ||||
| 
 | ||||
|     _TEST = { | ||||
|         'url': 'http://iptv.orf.at/stories/2275236/', | ||||
| @ -334,74 +412,32 @@ class ORFIPTVIE(InfoExtractor): | ||||
|         video_id = self._search_regex( | ||||
|             r'data-video(?:id)?="(\d+)"', webpage, 'video id') | ||||
| 
 | ||||
|         data = self._download_json( | ||||
|             'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, | ||||
|             video_id)[0] | ||||
| 
 | ||||
|         duration = float_or_none(data['duration'], 1000) | ||||
| 
 | ||||
|         video = data['sources']['default'] | ||||
|         load_balancer_url = video['loadBalancerUrl'] | ||||
|         abr = int_or_none(video.get('audioBitrate')) | ||||
|         vbr = int_or_none(video.get('bitrate')) | ||||
|         fps = int_or_none(video.get('videoFps')) | ||||
|         width = int_or_none(video.get('videoWidth')) | ||||
|         height = int_or_none(video.get('videoHeight')) | ||||
|         thumbnail = video.get('preview') | ||||
| 
 | ||||
|         rendition = self._download_json( | ||||
|             load_balancer_url, video_id, transform_source=strip_jsonp) | ||||
| 
 | ||||
|         f = { | ||||
|             'abr': abr, | ||||
|             'vbr': vbr, | ||||
|             'fps': fps, | ||||
|             'width': width, | ||||
|             'height': height, | ||||
|         } | ||||
| 
 | ||||
|         formats = [] | ||||
|         for format_id, format_url in rendition['redirect'].items(): | ||||
|             if format_id == 'rtmp': | ||||
|                 ff = f.copy() | ||||
|                 ff.update({ | ||||
|                     'url': format_url, | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|                 formats.append(ff) | ||||
|             elif determine_ext(format_url) == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     format_url, video_id, f4m_id=format_id)) | ||||
|             elif determine_ext(format_url) == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     format_url, video_id, 'mp4', m3u8_id=format_id)) | ||||
|             else: | ||||
|                 continue | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
|         title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at') | ||||
|         description = self._og_search_description(webpage) | ||||
|         upload_date = unified_strdate(self._html_search_meta( | ||||
|             'dc.date', webpage, 'upload date')) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         return self._extract_video(video_id, webpage) | ||||
| 
 | ||||
| 
 | ||||
| class ORFFM4StoryIE(InfoExtractor): | ||||
| class ORFFM4StoryIE(ORFIPTVBase): | ||||
|     IE_NAME = 'orf:fm4:story' | ||||
|     IE_DESC = 'fm4.orf.at stories' | ||||
|     _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)' | ||||
|     _TITLE_STRIP_RE = r'\s+-\s+fm4\.ORF\.at\s*$' | ||||
| 
 | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://fm4.orf.at/stories/3041554/', | ||||
|         'add_ie': ['Youtube'], | ||||
|         'info_dict': { | ||||
|             'id': '3041554', | ||||
|             'title': 'Is The EU Green Deal In Mortal Danger?', | ||||
|         }, | ||||
|         'playlist_count': 4, | ||||
|         'params': { | ||||
|             'format': 'bestvideo', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://fm4.orf.at/stories/2865738/', | ||||
|         'info_dict': { | ||||
|             'id': '2865738', | ||||
|             'title': 'Manu Delago und Inner Tongue live', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': 'e1c2c706c45c7b34cf478bbf409907ca', | ||||
|             'info_dict': { | ||||
| @ -418,86 +454,49 @@ class ORFFM4StoryIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': '547798', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Manu Delago und Inner Tongue live (2)', | ||||
|                 'title': 'Manu Delago und Inner Tongue https://vod-ww.mdn.ors.at/cms-worldwide_episodes_nas/_definst_/nas/cms-worldwide_episodes/online/14228823_0005.smil/chunklist_b992000_vo.m3u8live (2)', | ||||
|                 'duration': 1504.08, | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|                 'upload_date': '20170913', | ||||
|                 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.', | ||||
|             }, | ||||
|         }], | ||||
|     } | ||||
|         'skip': 'Videos gone', | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         story_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, story_id) | ||||
| 
 | ||||
|         entries = [] | ||||
|         all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage)) | ||||
|         for idx, video_id in enumerate(all_ids): | ||||
|             data = self._download_json( | ||||
|                 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, | ||||
|                 video_id)[0] | ||||
|         seen_ids = set() | ||||
|         for idx, video_id in enumerate(re.findall(r'data-video(?:id)?="(\d+)"', webpage)): | ||||
|             if video_id in seen_ids: | ||||
|                 continue | ||||
|             seen_ids.add(video_id) | ||||
|             entry = self._extract_video(video_id, webpage, fatal=False) | ||||
|             if not entry: | ||||
|                 continue | ||||
| 
 | ||||
|             duration = float_or_none(data['duration'], 1000) | ||||
| 
 | ||||
|             video = data['sources']['q8c'] | ||||
|             load_balancer_url = video['loadBalancerUrl'] | ||||
|             abr = int_or_none(video.get('audioBitrate')) | ||||
|             vbr = int_or_none(video.get('bitrate')) | ||||
|             fps = int_or_none(video.get('videoFps')) | ||||
|             width = int_or_none(video.get('videoWidth')) | ||||
|             height = int_or_none(video.get('videoHeight')) | ||||
|             thumbnail = video.get('preview') | ||||
| 
 | ||||
|             rendition = self._download_json( | ||||
|                 load_balancer_url, video_id, transform_source=strip_jsonp) | ||||
| 
 | ||||
|             f = { | ||||
|                 'abr': abr, | ||||
|                 'vbr': vbr, | ||||
|                 'fps': fps, | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|             } | ||||
| 
 | ||||
|             formats = [] | ||||
|             for format_id, format_url in rendition['redirect'].items(): | ||||
|                 if format_id == 'rtmp': | ||||
|                     ff = f.copy() | ||||
|                     ff.update({ | ||||
|                         'url': format_url, | ||||
|                         'format_id': format_id, | ||||
|                     }) | ||||
|                     formats.append(ff) | ||||
|                 elif determine_ext(format_url) == 'f4m': | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         format_url, video_id, f4m_id=format_id)) | ||||
|                 elif determine_ext(format_url) == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         format_url, video_id, 'mp4', m3u8_id=format_id)) | ||||
|                 else: | ||||
|                     continue | ||||
|             self._sort_formats(formats) | ||||
| 
 | ||||
|             title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at') | ||||
|             if idx >= 1: | ||||
|                 # Titles are duplicates, make them unique | ||||
|                 title += ' (' + str(idx + 1) + ')' | ||||
|             description = self._og_search_description(webpage) | ||||
|             upload_date = unified_strdate(self._html_search_meta( | ||||
|                 'dc.date', webpage, 'upload date')) | ||||
|                 entry['title'] = '%s (%d)' % (entry['title'], idx) | ||||
| 
 | ||||
|             entries.append({ | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'description': description, | ||||
|                 'duration': duration, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'upload_date': upload_date, | ||||
|                 'formats': formats, | ||||
|             }) | ||||
|             entries.append(entry) | ||||
| 
 | ||||
|         return self.playlist_result(entries) | ||||
|         seen_ids = set() | ||||
|         for yt_id in re.findall( | ||||
|                 r'data-id\s*=\s*["\']([\w-]+)[^>]+\bclass\s*=\s*["\']youtube\b', | ||||
|                 webpage): | ||||
|             if yt_id in seen_ids: | ||||
|                 continue | ||||
|             seen_ids.add(yt_id) | ||||
|             if YoutubeIE.suitable(yt_id): | ||||
|                 entries.append(self.url_result(yt_id, ie='Youtube', video_id=yt_id)) | ||||
| 
 | ||||
|         return self.playlist_result( | ||||
|             entries, story_id, | ||||
|             re.sub(self._TITLE_STRIP_RE, '', self._og_search_title(webpage, default='') or None)) | ||||
| 
 | ||||
| 
 | ||||
| class ORFONBase(InfoExtractor): | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 dirkf
						dirkf