mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-31 09:43:32 +00:00 
			
		
		
		
	[spiegel] fix info extraction(#16538)
This commit is contained in:
		
							parent
							
								
									cfd7f2a636
								
							
						
					
					
						commit
						a07879d6b2
					
				| @ -11,9 +11,9 @@ from .nexx import ( | ||||
| from .spiegeltv import SpiegeltvIE | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     unified_strdate, | ||||
|     get_element_by_attribute, | ||||
|     parse_duration, | ||||
|     strip_or_none, | ||||
|     unified_timestamp, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| @ -21,35 +21,38 @@ class SpiegelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', | ||||
|         'md5': '2c2754212136f35fb4b19767d242f66e', | ||||
|         'md5': 'b57399839d055fccfeb9a0455c439868', | ||||
|         'info_dict': { | ||||
|             'id': '1259285', | ||||
|             'id': '563747', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', | ||||
|             'description': 'md5:8029d8310232196eb235d27575a8b9f4', | ||||
|             'duration': 49, | ||||
|             'upload_date': '20130311', | ||||
|             'timestamp': 1362994320, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', | ||||
|         'md5': 'f2cdf638d7aa47654e251e1aee360af1', | ||||
|         'md5': '5b6c2f4add9d62912ed5fc78a1faed80', | ||||
|         'info_dict': { | ||||
|             'id': '1309159', | ||||
|             'id': '580988', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers', | ||||
|             'description': 'md5:c2322b65e58f385a820c10fa03b2d088', | ||||
|             'duration': 983, | ||||
|             'upload_date': '20131115', | ||||
|             'timestamp': 1384546642, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html', | ||||
|         'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51', | ||||
|         'md5': '97b91083a672d72976faa8433430afb9', | ||||
|         'info_dict': { | ||||
|             'id': '1519126', | ||||
|             'id': '601883', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.', | ||||
|             'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"', | ||||
|             'upload_date': '20140904', | ||||
|             'timestamp': 1409834160, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html', | ||||
| @ -62,59 +65,28 @@ class SpiegelIE(InfoExtractor): | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage, handle = self._download_webpage_handle(url, video_id) | ||||
|         metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id | ||||
|         handle = self._request_webpage(metadata_url, video_id) | ||||
| 
 | ||||
|         # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html | ||||
|         if SpiegeltvIE.suitable(handle.geturl()): | ||||
|             return self.url_result(handle.geturl(), 'Spiegeltv') | ||||
| 
 | ||||
|         nexx_id = self._search_regex( | ||||
|             r'nexxOmniaId\s*:\s*(\d+)', webpage, 'nexx id', default=None) | ||||
|         if nexx_id: | ||||
|             domain_id = NexxIE._extract_domain_id(webpage) or '748' | ||||
|             return self.url_result( | ||||
|                 'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(), | ||||
|                 video_id=nexx_id) | ||||
| 
 | ||||
|         video_data = extract_attributes(self._search_regex(r'(<div[^>]+id="spVideoElements"[^>]+>)', webpage, 'video element', default='')) | ||||
| 
 | ||||
|         title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage) | ||||
|         description = video_data.get('data-video-teaser') or self._html_search_meta('description', webpage, 'description') | ||||
| 
 | ||||
|         base_url = self._search_regex( | ||||
|             [r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'], | ||||
|             webpage, 'server URL', group='url') | ||||
| 
 | ||||
|         xml_url = base_url + video_id + '.xml' | ||||
|         idoc = self._download_xml(xml_url, video_id) | ||||
| 
 | ||||
|         formats = [] | ||||
|         for n in list(idoc): | ||||
|             if n.tag.startswith('type') and n.tag != 'type6': | ||||
|                 format_id = n.tag.rpartition('type')[2] | ||||
|                 video_url = base_url + n.find('./filename').text | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': video_url, | ||||
|                     'width': int(n.find('./width').text), | ||||
|                     'height': int(n.find('./height').text), | ||||
|                     'abr': int(n.find('./audiobitrate').text), | ||||
|                     'vbr': int(n.find('./videobitrate').text), | ||||
|                     'vcodec': n.find('./codec').text, | ||||
|                     'acodec': 'MP4A', | ||||
|                 }) | ||||
|         duration = float(idoc[0].findall('./duration')[0].text) | ||||
| 
 | ||||
|         self._check_formats(formats, video_id) | ||||
|         self._sort_formats(formats) | ||||
|         video_data = self._parse_json(self._webpage_read_content( | ||||
|             handle, metadata_url, video_id), video_id) | ||||
|         title = video_data['title'] | ||||
|         nexx_id = video_data['nexxOmniaId'] | ||||
|         domain_id = video_data.get('nexxOmniaDomain') or '748' | ||||
| 
 | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': 'nexx:%s:%s' % (domain_id, nexx_id), | ||||
|             'title': title, | ||||
|             'description': description.strip() if description else None, | ||||
|             'duration': duration, | ||||
|             'upload_date': unified_strdate(video_data.get('data-video-date')), | ||||
|             'formats': formats, | ||||
|             'description': strip_or_none(video_data.get('teaser')), | ||||
|             'duration': parse_duration(video_data.get('duration')), | ||||
|             'timestamp': unified_timestamp(video_data.get('datum')), | ||||
|             'ie_key': NexxIE.ie_key(), | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Remita Amine
						Remita Amine