mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-31 17:53:11 +00:00 
			
		
		
		
	[amara] improve extraction
This commit is contained in:
		
							parent
							
								
									cf1a8668e8
								
							
						
					
					
						commit
						2cf8003638
					
				| @ -1,12 +1,20 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from .vimeo import VimeoIE | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     update_url_query, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class AmaraIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|     _TESTS = [{ | ||||
|         # Youtube | ||||
|         'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video', | ||||
|         'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae', | ||||
|         'info_dict': { | ||||
| @ -18,10 +26,11 @@ class AmaraIE(InfoExtractor): | ||||
|             'subtitles': dict, | ||||
|             'upload_date': '20160813', | ||||
|             'uploader': 'PBS NewsHour', | ||||
|                 'uploader_id': 'PBSNewsHour' | ||||
|             'uploader_id': 'PBSNewsHour', | ||||
|             'timestamp': 1549639570, | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|     }, { | ||||
|         # Vimeo | ||||
|         'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', | ||||
|         'md5': '99392c75fa05d432a8f11df03612195e', | ||||
|         'info_dict': { | ||||
| @ -31,46 +40,64 @@ class AmaraIE(InfoExtractor): | ||||
|             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'subtitles': dict, | ||||
|                 'timestamp': 1294649110, | ||||
|                 'upload_date': '20110110', | ||||
|             'timestamp': 1294763658, | ||||
|             'upload_date': '20110111', | ||||
|             'uploader': 'Sam Morrill', | ||||
|             'uploader_id': 'sammorrill' | ||||
|         } | ||||
|         }, | ||||
|         { | ||||
|     }, { | ||||
|         # Direct Link | ||||
|         'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', | ||||
|         'md5': 'd3970f08512738ee60c5807311ff5d3f', | ||||
|         'info_dict': { | ||||
|                 'id': 'ChimamandaAdichie_2009G-transcript', | ||||
|             'id': 's8KL7I3jLmh6', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The danger of a single story', | ||||
|             'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'subtitles': dict, | ||||
|                 'upload_date': '20131206' | ||||
|             'upload_date': '20091007', | ||||
|             'timestamp': 1254942511, | ||||
|         } | ||||
|         } | ||||
|     ] | ||||
| 
 | ||||
|     def get_subtitles_for_language(self, language): | ||||
|         return [{ | ||||
|             'ext': type, | ||||
|             'url': language['subtitles_uri'].replace('format=json', 'format=' + type) | ||||
|         } for type in ['vtt', 'srt', 'json']] | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         meta = self._download_json('https://amara.org/api/videos/%s/' % video_id, video_id, query={'format': 'json'}) | ||||
|         meta = self._download_json( | ||||
|             'https://amara.org/api/videos/%s/' % video_id, | ||||
|             video_id, query={'format': 'json'}) | ||||
|         title = meta['title'] | ||||
|         video_url = meta['all_urls'][0] | ||||
| 
 | ||||
|         video_url = meta.get('all_urls')[0] | ||||
|         subtitles = dict([(language['code'], self.get_subtitles_for_language(language)) for language in meta.get('languages', []) if language['published']]) | ||||
|         subtitles = {} | ||||
|         for language in (meta.get('languages') or []): | ||||
|             subtitles_uri = language.get('subtitles_uri') | ||||
|             if not (subtitles_uri and language.get('published')): | ||||
|                 continue | ||||
|             subtitle = subtitles.setdefault(language.get('code') or 'en', []) | ||||
|             for f in ('json', 'srt', 'vtt'): | ||||
|                 subtitle.append({ | ||||
|                     'ext': f, | ||||
|                     'url': update_url_query(subtitles_uri, {'format': f}), | ||||
|                 }) | ||||
| 
 | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|         info = { | ||||
|             'url': video_url, | ||||
|             'id': video_id, | ||||
|             'subtitles': subtitles, | ||||
|             'title': meta['title'], | ||||
|             'title': title, | ||||
|             'description': meta.get('description'), | ||||
|             'thumbnail': meta.get('thumbnail') | ||||
|             'thumbnail': meta.get('thumbnail'), | ||||
|             'duration': int_or_none(meta.get('duration')), | ||||
|             'timestamp': parse_iso8601(meta.get('created')), | ||||
|         } | ||||
| 
 | ||||
|         for ie in (YoutubeIE, VimeoIE): | ||||
|             if ie.suitable(video_url): | ||||
|                 info.update({ | ||||
|                     '_type': 'url_transparent', | ||||
|                     'ie_key': ie.ie_key(), | ||||
|                 }) | ||||
|                 break | ||||
| 
 | ||||
|         return info | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Remita Amine
						Remita Amine