mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-31 09:43:32 +00:00 
			
		
		
		
	[extractor/common] Fix inline HTML5 media tags processing and add test (closes #27345)
This commit is contained in:
		
							parent
							
								
									e2bdf8bf4f
								
							
						
					
					
						commit
						5a1fbbf8b7
					
				| @ -108,6 +108,18 @@ class TestInfoExtractor(unittest.TestCase): | ||||
|         self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) | ||||
| 
 | ||||
|     def test_parse_html5_media_entries(self): | ||||
|         # inline video tag | ||||
|         expect_dict( | ||||
|             self, | ||||
|             self.ie._parse_html5_media_entries( | ||||
|                 'https://127.0.0.1/video.html', | ||||
|                 r'<html><video src="/vid.mp4" /></html>', None)[0], | ||||
|             { | ||||
|                 'formats': [{ | ||||
|                     'url': 'https://127.0.0.1/vid.mp4', | ||||
|                 }], | ||||
|             }) | ||||
| 
 | ||||
|         # from https://www.r18.com/ | ||||
|         # with kpbs in label | ||||
|         expect_dict( | ||||
|  | ||||
| @ -2515,9 +2515,9 @@ class InfoExtractor(object): | ||||
|         # https://www.ampproject.org/docs/reference/components/amp-video) | ||||
|         # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/ | ||||
|         _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)' | ||||
|         media_tags = [(media_tag, media_type, '') | ||||
|                       for media_tag, media_type | ||||
|                       in re.findall(r'(?s)(<%s[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)] | ||||
|         media_tags = [(media_tag, media_tag_name, media_type, '') | ||||
|                       for media_tag, media_tag_name, media_type | ||||
|                       in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)] | ||||
|         media_tags.extend(re.findall( | ||||
|             # We only allow video|audio followed by a whitespace or '>'. | ||||
|             # Allowing more characters may end up in significant slow down (see | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Sergey M․
						Sergey M․