mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-11-04 03:33:46 +00:00 
			
		
		
		
	[InfoExtractor] Handle unquoted values in OpenGraph searches
This commit is contained in:
		
							parent
							
								
									6f8c2635a5
								
							
						
					
					
						commit
						42b098dd79
					
				@ -62,6 +62,7 @@ class TestInfoExtractor(unittest.TestCase):
 | 
			
		||||
            <meta name="og:test1" content='foo > < bar'/>
 | 
			
		||||
            <meta name="og:test2" content="foo >//< bar"/>
 | 
			
		||||
            <meta property=og-test3 content='Ill-formatted opengraph'/>
 | 
			
		||||
            <meta property=og:test4 content=unquoted-value/>
 | 
			
		||||
            '''
 | 
			
		||||
        self.assertEqual(ie._og_search_title(html), 'Foo')
 | 
			
		||||
        self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
 | 
			
		||||
@ -74,6 +75,7 @@ class TestInfoExtractor(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
 | 
			
		||||
        self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
 | 
			
		||||
        self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
 | 
			
		||||
        self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value')
 | 
			
		||||
 | 
			
		||||
    def test_html_search_meta(self):
 | 
			
		||||
        ie = self.ie
 | 
			
		||||
 | 
			
		||||
@ -1087,7 +1087,7 @@ class InfoExtractor(object):
 | 
			
		||||
    # Helper functions for extracting OpenGraph info
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _og_regexes(prop):
 | 
			
		||||
        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
 | 
			
		||||
        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
 | 
			
		||||
        property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
 | 
			
		||||
                       % {'prop': re.escape(prop)})
 | 
			
		||||
        template = r'<meta[^>]+?%s[^>]+?%s'
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user