From 13b0e81f17c802320302c5f0490d8bfc44a0faee Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 20 Oct 2024 13:22:25 +0100 Subject: [PATCH] [utils] Correctly match class names in `get_element[s]_by_class()` * reproduce CSS .classname behaviour ("bar" matches "bar", "foo bar baz", etc) * add tests --- test/test_utils.py | 10 ++++++++-- youtube_dl/utils.py | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 2947cce7e..16523a0c9 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1603,11 +1603,14 @@ Line 1 def test_get_element_by_class(self): html = ''' - nice + nice ''' self.assertEqual(get_element_by_class('foo', html), 'nice') + self.assertEqual(get_element_by_class('bar', html), 'nice') self.assertEqual(get_element_by_class('no-such-class', html), None) + self.assertEqual(get_element_by_class('baz', html), None) + self.assertEqual(get_element_by_class('bam', html), None) def test_get_element_by_attribute(self): html = ''' @@ -1626,10 +1629,13 @@ Line 1 def test_get_elements_by_class(self): html = ''' - nicealso nice + nicealso nice ''' self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice']) + self.assertEqual(get_elements_by_class('bar', html), ['nice', 'also nice']) + self.assertEqual(get_elements_by_class('baz', html), []) + self.assertEqual(get_elements_by_class('bam', html), []) self.assertEqual(get_elements_by_class('no-such-class', html), []) def test_get_elements_by_attribute(self): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ac1e78002..a8a26ee02 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1960,7 +1960,7 @@ def get_element_by_attribute(attribute, value, html, escape_value=True): def get_elements_by_class(class_name, html): """Return the content of all tags with the specified class in the passed HTML document as a list""" return get_elements_by_attribute( - 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), + 'class', r'[^\'"]*(?