mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-31 09:43:32 +00:00 
			
		
		
		
	[xiami] Improve extraction (Closes #9079)
* Switch to JSON source * Add abstract IE for playlists * Extract more track related metadata
This commit is contained in:
		
							parent
							
								
									89c0dc9a5f
								
							
						
					
					
						commit
						4e0c0c1508
					
				| @ -942,7 +942,7 @@ from .xhamster import ( | ||||
|     XHamsterEmbedIE, | ||||
| ) | ||||
| from .xiami import ( | ||||
|     XiamiIE, | ||||
|     XiamiSongIE, | ||||
|     XiamiAlbumIE, | ||||
|     XiamiArtistIE, | ||||
|     XiamiCollectionIE | ||||
|  | ||||
| @ -1,50 +1,42 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     xpath_element, | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
|     int_or_none, | ||||
|     ExtractorError | ||||
| ) | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
| from ..utils import int_or_none | ||||
| 
 | ||||
| 
 | ||||
| class XiamiBaseIE(InfoExtractor): | ||||
|     _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id' | ||||
| 
 | ||||
|     _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id' | ||||
|     _NS_MAP = {'xm': 'http://xspf.org/ns/0/'} | ||||
|     def _extract_track(self, track, track_id=None): | ||||
|         title = track['title'] | ||||
|         track_url = self._decrypt(track['location']) | ||||
| 
 | ||||
|     def _extract_track(self, track): | ||||
|         artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='') | ||||
|         artist = artist.split(';') | ||||
|         subtitles = {} | ||||
|         lyrics_url = track.get('lyric_url') or track.get('lyric') | ||||
|         if lyrics_url and lyrics_url.startswith('http'): | ||||
|             subtitles['origin'] = [{'url': lyrics_url}] | ||||
| 
 | ||||
|         ret = { | ||||
|             'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)), | ||||
|             'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)), | ||||
|             'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)), | ||||
|             'artist': ';'.join(artist) if artist else None, | ||||
|             'creator': artist[0] if artist else None, | ||||
|             'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))), | ||||
|             'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None), | ||||
|             'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))), | ||||
|         return { | ||||
|             'id': track.get('song_id') or track_id, | ||||
|             'url': track_url, | ||||
|             'title': title, | ||||
|             'thumbnail': track.get('pic') or track.get('album_pic'), | ||||
|             'duration': int_or_none(track.get('length')), | ||||
|             'creator': track.get('artist', '').split(';')[0], | ||||
|             'track': title, | ||||
|             'album': track.get('album_name'), | ||||
|             'artist': track.get('artist'), | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
| 
 | ||||
|         lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP)) | ||||
|         if lyrics_url and lyrics_url.endswith('.lrc'): | ||||
|             ret['description'] = self._download_webpage(lyrics_url, ret['id']) | ||||
|         return ret | ||||
| 
 | ||||
|     def _extract_xml(self, _id, typ=''): | ||||
|         playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id) | ||||
|         tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP)) | ||||
| 
 | ||||
|         if not len(tracklist): | ||||
|             raise ExtractorError('No track found') | ||||
|         return [self._extract_track(track) for track in tracklist] | ||||
|     def _extract_tracks(self, item_id, typ=None): | ||||
|         playlist = self._download_json( | ||||
|             '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id) | ||||
|         return [ | ||||
|             self._extract_track(track, item_id) | ||||
|             for track in playlist['data']['trackList']] | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _decrypt(origin): | ||||
| @ -62,75 +54,87 @@ class XiamiBaseIE(InfoExtractor): | ||||
|         ans = '' | ||||
|         for i in range(0, short_lenth + 1): | ||||
|             for j in range(0, n): | ||||
|                 if len(l[j])>i: | ||||
|                 if len(l[j]) > i: | ||||
|                     ans += l[j][i] | ||||
|         return compat_urllib_parse_unquote(ans).replace('^', '0') | ||||
| 
 | ||||
| 
 | ||||
| class XiamiIE(XiamiBaseIE): | ||||
| class XiamiSongIE(XiamiBaseIE): | ||||
|     IE_NAME = 'xiami:song' | ||||
|     IE_DESC = '虾米音乐' | ||||
|     _VALID_URL = r'http://www\.xiami\.com/song/(?P<id>[0-9]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.xiami.com/song/1775610518', | ||||
|             'md5': '521dd6bea40fd5c9c69f913c232cb57e', | ||||
|             'info_dict': { | ||||
|                 'id': '1775610518', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Woman', | ||||
|                 'creator': 'HONNE', | ||||
|                 'album': 'Woman', | ||||
|                 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', | ||||
|                 'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b', | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.xiami.com/song/1775256504', | ||||
|             'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', | ||||
|             'info_dict': { | ||||
|                 'id': '1775256504', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': '悟空', | ||||
|                 'creator': '戴荃', | ||||
|                 'album': '悟空', | ||||
|                 'description': 'md5:206e67e84f9bed1d473d04196a00b990', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|     _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.xiami.com/song/1775610518', | ||||
|         'md5': '521dd6bea40fd5c9c69f913c232cb57e', | ||||
|         'info_dict': { | ||||
|             'id': '1775610518', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Woman', | ||||
|             'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', | ||||
|             'duration': 265, | ||||
|             'creator': 'HONNE', | ||||
|             'track': 'Woman', | ||||
|             'album': 'Woman', | ||||
|             'artist': 'HONNE', | ||||
|             'subtitles': { | ||||
|                 'origin': [{ | ||||
|                     'ext': 'lrc', | ||||
|                 }], | ||||
|             }, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.xiami.com/song/1775256504', | ||||
|         'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', | ||||
|         'info_dict': { | ||||
|             'id': '1775256504', | ||||
|             'ext': 'mp3', | ||||
|             'title': '悟空', | ||||
|             'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', | ||||
|             'duration': 200, | ||||
|             'creator': '戴荃', | ||||
|             'track': '悟空', | ||||
|             'album': '悟空', | ||||
|             'artist': '戴荃', | ||||
|             'subtitles': { | ||||
|                 'origin': [{ | ||||
|                     'ext': 'lrc', | ||||
|                 }], | ||||
|             }, | ||||
|         } | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         _id = self._match_id(url) | ||||
|         return self._extract_xml(_id)[0] | ||||
|         return self._extract_tracks(self._match_id(url))[0] | ||||
| 
 | ||||
| 
 | ||||
| class XiamiAlbumIE(XiamiBaseIE): | ||||
| class XiamiPlaylistBaseIE(XiamiBaseIE): | ||||
|     def _real_extract(self, url): | ||||
|         item_id = self._match_id(url) | ||||
|         return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id) | ||||
| 
 | ||||
| 
 | ||||
| class XiamiAlbumIE(XiamiPlaylistBaseIE): | ||||
|     IE_NAME = 'xiami:album' | ||||
|     IE_DESC = '虾米音乐 - 专辑' | ||||
|     _VALID_URL = r'http://www\.xiami\.com/album/(?P<id>[0-9]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.xiami.com/album/2100300444', | ||||
|             'info_dict': { | ||||
|                 'id': '2100300444', | ||||
|             }, | ||||
|             'playlist_count': 10, | ||||
|     _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)' | ||||
|     _TYPE = '1' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.xiami.com/album/2100300444', | ||||
|         'info_dict': { | ||||
|             'id': '2100300444', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         _id = self._match_id(url) | ||||
|         return self.playlist_result(self._extract_xml(_id, '/type/1'), _id) | ||||
|         'playlist_count': 10, | ||||
|     }, { | ||||
|         'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
| 
 | ||||
| class XiamiArtistIE(XiamiBaseIE): | ||||
| class XiamiArtistIE(XiamiPlaylistBaseIE): | ||||
|     IE_NAME = 'xiami:artist' | ||||
|     IE_DESC = '虾米音乐 - 歌手' | ||||
|     _VALID_URL = r'http://www\.xiami\.com/artist/(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)' | ||||
|     _TYPE = '2' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', | ||||
|         'info_dict': { | ||||
| @ -139,23 +143,16 @@ class XiamiArtistIE(XiamiBaseIE): | ||||
|         'playlist_count': 20, | ||||
|     } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         _id = self._match_id(url) | ||||
|         return self.playlist_result(self._extract_xml(_id, '/type/2'), _id) | ||||
| 
 | ||||
| 
 | ||||
| class XiamiCollectionIE(XiamiBaseIE): | ||||
| class XiamiCollectionIE(XiamiPlaylistBaseIE): | ||||
|     IE_NAME = 'xiami:collection' | ||||
|     IE_DESC = '虾米音乐 - 精选集' | ||||
|     _VALID_URL = r'http://www\.xiami\.com/collect/(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)' | ||||
|     _TYPE = '3' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', | ||||
|         'info_dict': { | ||||
|             'id': '156527391', | ||||
|         }, | ||||
|         'playlist_count': 26, | ||||
|         'playlist_mincount': 29, | ||||
|     } | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         _id = self._match_id(url) | ||||
|         return self.playlist_result(self._extract_xml(_id, '/type/3'), _id) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Sergey M․
						Sergey M․