mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-11-04 03:33:46 +00:00 
			
		
		
		
	[youtube] Add fallback metadata extraction from videoDetails (closes #18052)
This commit is contained in:
		
							parent
							
								
									38c32dbf19
								
							
						
					
					
						commit
						dbdaaa231a
					
				@ -41,6 +41,7 @@ from ..utils import (
 | 
			
		||||
    remove_quotes,
 | 
			
		||||
    remove_start,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
    str_or_none,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
    try_get,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
@ -501,6 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                'categories': ['Science & Technology'],
 | 
			
		||||
                'tags': ['youtube-dl'],
 | 
			
		||||
                'duration': 10,
 | 
			
		||||
                'view_count': int,
 | 
			
		||||
                'like_count': int,
 | 
			
		||||
                'dislike_count': int,
 | 
			
		||||
                'start_time': 1,
 | 
			
		||||
@ -583,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                'categories': ['Science & Technology'],
 | 
			
		||||
                'tags': ['youtube-dl'],
 | 
			
		||||
                'duration': 10,
 | 
			
		||||
                'view_count': int,
 | 
			
		||||
                'like_count': int,
 | 
			
		||||
                'dislike_count': int,
 | 
			
		||||
            },
 | 
			
		||||
@ -1538,6 +1541,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
        def extract_view_count(v_info):
 | 
			
		||||
            return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
 | 
			
		||||
 | 
			
		||||
        player_response = {}
 | 
			
		||||
 | 
			
		||||
        # Get video info
 | 
			
		||||
        embed_webpage = None
 | 
			
		||||
        if re.search(r'player-age-gate-content">', video_webpage) is not None:
 | 
			
		||||
@ -1580,6 +1585,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                if args.get('livestream') == '1' or args.get('live_playback') == 1:
 | 
			
		||||
                    is_live = True
 | 
			
		||||
                sts = ytplayer_config.get('sts')
 | 
			
		||||
                if not player_response:
 | 
			
		||||
                    pl_response = str_or_none(args.get('player_response'))
 | 
			
		||||
                    if pl_response:
 | 
			
		||||
                        pl_response = self._parse_json(pl_response, video_id, fatal=False)
 | 
			
		||||
                        if isinstance(pl_response, dict):
 | 
			
		||||
                            player_response = pl_response
 | 
			
		||||
            if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
 | 
			
		||||
                # We also try looking in get_video_info since it may contain different dashmpd
 | 
			
		||||
                # URL that points to a DASH manifest with possibly different itag set (some itags
 | 
			
		||||
@ -1608,6 +1619,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                    if not video_info_webpage:
 | 
			
		||||
                        continue
 | 
			
		||||
                    get_video_info = compat_parse_qs(video_info_webpage)
 | 
			
		||||
                    if not player_response:
 | 
			
		||||
                        pl_response = get_video_info.get('player_response', [None])[0]
 | 
			
		||||
                        if isinstance(pl_response, dict):
 | 
			
		||||
                            player_response = pl_response
 | 
			
		||||
                    add_dash_mpd(get_video_info)
 | 
			
		||||
                    if view_count is None:
 | 
			
		||||
                        view_count = extract_view_count(get_video_info)
 | 
			
		||||
@ -1653,9 +1668,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                    '"token" parameter not in video info for unknown reason',
 | 
			
		||||
                    video_id=video_id)
 | 
			
		||||
 | 
			
		||||
        video_details = try_get(
 | 
			
		||||
            player_response, lambda x: x['videoDetails'], dict) or {}
 | 
			
		||||
 | 
			
		||||
        # title
 | 
			
		||||
        if 'title' in video_info:
 | 
			
		||||
            video_title = video_info['title'][0]
 | 
			
		||||
        elif 'title' in player_response:
 | 
			
		||||
            video_title = video_details['title']
 | 
			
		||||
        else:
 | 
			
		||||
            self._downloader.report_warning('Unable to extract video title')
 | 
			
		||||
            video_title = '_'
 | 
			
		||||
@ -1718,6 +1738,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
 | 
			
		||||
        if view_count is None:
 | 
			
		||||
            view_count = extract_view_count(video_info)
 | 
			
		||||
        if view_count is None and video_details:
 | 
			
		||||
            view_count = int_or_none(video_details.get('viewCount'))
 | 
			
		||||
 | 
			
		||||
        # Check for "rental" videos
 | 
			
		||||
        if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
 | 
			
		||||
@ -1898,7 +1920,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
            raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
 | 
			
		||||
 | 
			
		||||
        # uploader
 | 
			
		||||
        video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
 | 
			
		||||
        video_uploader = try_get(
 | 
			
		||||
            video_info, lambda x: x['author'][0],
 | 
			
		||||
            compat_str) or str_or_none(video_details.get('author'))
 | 
			
		||||
        if video_uploader:
 | 
			
		||||
            video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
 | 
			
		||||
        else:
 | 
			
		||||
@ -2011,12 +2035,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
        like_count = _extract_count('like')
 | 
			
		||||
        dislike_count = _extract_count('dislike')
 | 
			
		||||
 | 
			
		||||
        if view_count is None:
 | 
			
		||||
            view_count = str_to_int(self._search_regex(
 | 
			
		||||
                r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
 | 
			
		||||
                'view count', default=None))
 | 
			
		||||
 | 
			
		||||
        # subtitles
 | 
			
		||||
        video_subtitles = self.extract_subtitles(video_id, video_webpage)
 | 
			
		||||
        automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
 | 
			
		||||
 | 
			
		||||
        video_duration = try_get(
 | 
			
		||||
            video_info, lambda x: int_or_none(x['length_seconds'][0]))
 | 
			
		||||
        if not video_duration:
 | 
			
		||||
            video_duration = int_or_none(video_details.get('lengthSeconds'))
 | 
			
		||||
        if not video_duration:
 | 
			
		||||
            video_duration = parse_duration(self._html_search_meta(
 | 
			
		||||
                'duration', video_webpage, 'video duration'))
 | 
			
		||||
@ -2244,6 +2275,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
 | 
			
		||||
            'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
 | 
			
		||||
            'categories': ['People & Blogs'],
 | 
			
		||||
            'tags': list,
 | 
			
		||||
            'view_count': int,
 | 
			
		||||
            'like_count': int,
 | 
			
		||||
            'dislike_count': int,
 | 
			
		||||
        },
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user