mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-31 17:53:11 +00:00 
			
		
		
		
	[amara] improve extraction
This commit is contained in:
		
							parent
							
								
									cf1a8668e8
								
							
						
					
					
						commit
						2cf8003638
					
				| @ -1,76 +1,103 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from .youtube import YoutubeIE | ||||||
|  | from .vimeo import VimeoIE | ||||||
|  | from ..utils import ( | ||||||
|  |     int_or_none, | ||||||
|  |     parse_iso8601, | ||||||
|  |     update_url_query, | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class AmaraIE(InfoExtractor): | class AmaraIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)' |     _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)' | ||||||
|     _TESTS = [ |     _TESTS = [{ | ||||||
|         { |         # Youtube | ||||||
|             'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video', |         'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video', | ||||||
|             'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae', |         'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae', | ||||||
|             'info_dict': { |         'info_dict': { | ||||||
|                 'id': 'h6ZuVdvYnfE', |             'id': 'h6ZuVdvYnfE', | ||||||
|                 'ext': 'mp4', |             'ext': 'mp4', | ||||||
|                 'title': 'Why jury trials are becoming less common', |             'title': 'Why jury trials are becoming less common', | ||||||
|                 'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1', |             'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1', | ||||||
|                 'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|                 'subtitles': dict, |             'subtitles': dict, | ||||||
|                 'upload_date': '20160813', |             'upload_date': '20160813', | ||||||
|                 'uploader': 'PBS NewsHour', |             'uploader': 'PBS NewsHour', | ||||||
|                 'uploader_id': 'PBSNewsHour' |             'uploader_id': 'PBSNewsHour', | ||||||
|             } |             'timestamp': 1549639570, | ||||||
|         }, |  | ||||||
|         { |  | ||||||
|             'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', |  | ||||||
|             'md5': '99392c75fa05d432a8f11df03612195e', |  | ||||||
|             'info_dict': { |  | ||||||
|                 'id': '18622084', |  | ||||||
|                 'ext': 'mov', |  | ||||||
|                 'title': 'Vimeo at CES 2011!', |  | ||||||
|                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', |  | ||||||
|                 'thumbnail': r're:^https?://.*\.jpg$', |  | ||||||
|                 'subtitles': dict, |  | ||||||
|                 'timestamp': 1294649110, |  | ||||||
|                 'upload_date': '20110110', |  | ||||||
|                 'uploader': 'Sam Morrill', |  | ||||||
|                 'uploader_id': 'sammorrill' |  | ||||||
|             } |  | ||||||
|         }, |  | ||||||
|         { |  | ||||||
|             'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', |  | ||||||
|             'md5': 'd3970f08512738ee60c5807311ff5d3f', |  | ||||||
|             'info_dict': { |  | ||||||
|                 'id': 'ChimamandaAdichie_2009G-transcript', |  | ||||||
|                 'ext': 'mp4', |  | ||||||
|                 'title': 'The danger of a single story', |  | ||||||
|                 'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23', |  | ||||||
|                 'thumbnail': r're:^https?://.*\.jpg$', |  | ||||||
|                 'subtitles': dict, |  | ||||||
|                 'upload_date': '20131206' |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|     ] |     }, { | ||||||
| 
 |         # Vimeo | ||||||
|     def get_subtitles_for_language(self, language): |         'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', | ||||||
|         return [{ |         'md5': '99392c75fa05d432a8f11df03612195e', | ||||||
|             'ext': type, |         'info_dict': { | ||||||
|             'url': language['subtitles_uri'].replace('format=json', 'format=' + type) |             'id': '18622084', | ||||||
|         } for type in ['vtt', 'srt', 'json']] |             'ext': 'mov', | ||||||
|  |             'title': 'Vimeo at CES 2011!', | ||||||
|  |             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|  |             'subtitles': dict, | ||||||
|  |             'timestamp': 1294763658, | ||||||
|  |             'upload_date': '20110111', | ||||||
|  |             'uploader': 'Sam Morrill', | ||||||
|  |             'uploader_id': 'sammorrill' | ||||||
|  |         } | ||||||
|  |     }, { | ||||||
|  |         # Direct Link | ||||||
|  |         'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', | ||||||
|  |         'md5': 'd3970f08512738ee60c5807311ff5d3f', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': 's8KL7I3jLmh6', | ||||||
|  |             'ext': 'mp4', | ||||||
|  |             'title': 'The danger of a single story', | ||||||
|  |             'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23', | ||||||
|  |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|  |             'subtitles': dict, | ||||||
|  |             'upload_date': '20091007', | ||||||
|  |             'timestamp': 1254942511, | ||||||
|  |         } | ||||||
|  |     }] | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|         meta = self._download_json('https://amara.org/api/videos/%s/' % video_id, video_id, query={'format': 'json'}) |         meta = self._download_json( | ||||||
|  |             'https://amara.org/api/videos/%s/' % video_id, | ||||||
|  |             video_id, query={'format': 'json'}) | ||||||
|  |         title = meta['title'] | ||||||
|  |         video_url = meta['all_urls'][0] | ||||||
| 
 | 
 | ||||||
|         video_url = meta.get('all_urls')[0] |         subtitles = {} | ||||||
|         subtitles = dict([(language['code'], self.get_subtitles_for_language(language)) for language in meta.get('languages', []) if language['published']]) |         for language in (meta.get('languages') or []): | ||||||
|  |             subtitles_uri = language.get('subtitles_uri') | ||||||
|  |             if not (subtitles_uri and language.get('published')): | ||||||
|  |                 continue | ||||||
|  |             subtitle = subtitles.setdefault(language.get('code') or 'en', []) | ||||||
|  |             for f in ('json', 'srt', 'vtt'): | ||||||
|  |                 subtitle.append({ | ||||||
|  |                     'ext': f, | ||||||
|  |                     'url': update_url_query(subtitles_uri, {'format': f}), | ||||||
|  |                 }) | ||||||
| 
 | 
 | ||||||
|         return { |         info = { | ||||||
|             '_type': 'url_transparent', |  | ||||||
|             'url': video_url, |             'url': video_url, | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'subtitles': subtitles, |             'subtitles': subtitles, | ||||||
|             'title': meta['title'], |             'title': title, | ||||||
|             'description': meta.get('description'), |             'description': meta.get('description'), | ||||||
|             'thumbnail': meta.get('thumbnail') |             'thumbnail': meta.get('thumbnail'), | ||||||
|  |             'duration': int_or_none(meta.get('duration')), | ||||||
|  |             'timestamp': parse_iso8601(meta.get('created')), | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  |         for ie in (YoutubeIE, VimeoIE): | ||||||
|  |             if ie.suitable(video_url): | ||||||
|  |                 info.update({ | ||||||
|  |                     '_type': 'url_transparent', | ||||||
|  |                     'ie_key': ie.ie_key(), | ||||||
|  |                 }) | ||||||
|  |                 break | ||||||
|  | 
 | ||||||
|  |         return info | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Remita Amine
						Remita Amine