youtube-dl/youtube_dl/extractor/worldstarhiphop.py

import re

from .common import InfoExtractor


class WorldStarHipHopIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
    _TEST = {
        "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
        "file": "wshh6a7q1ny0G34ZwuIO.mp4",
        "md5": "9d04de741161603bf7071bbf4e883186",
        "info_dict": {
            "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
        }
    }


    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')

        webpage_src = self._download_webpage(url, video_id)

        video_url = re.search(r'videoId=(.*?)&amp?',
            webpage_src)
        
        if video_url:
            self.to_screen(u'Vevo video detected:')
            return self.url_result('vevo:%s' % video_url.group(1), ie='Vevo')

        video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
            webpage_src, u'video URL')

        if video_url is None:
            video_url = self._search_regex(r'videoId=(.*?)&amp?',
                webpage_src, u'video URL')
            self.to_screen(u'Vevo video detected:')
            vevo_id = 'vevo:%s' % video_url
            return self.url_result(vevo_id, ie='Vevo')

        if 'youtube' in video_url:
            self.to_screen(u'Youtube video detected:')
            return self.url_result(video_url, ie='Youtube')

        if 'mp4' in video_url:
            ext = 'mp4'
        else:
            ext = 'flv'

        video_title = self._html_search_regex(r"<title>(.*)</title>",
            webpage_src, u'title')

        # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
        thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
            webpage_src, u'thumbnail', fatal=False)

        if not thumbnail:
            _title = r"""candytitles.*>(.*)</span>"""
            mobj = re.search(_title, webpage_src)
            if mobj is not None:
                video_title = mobj.group(1)

        results = [{
                    'id': video_id,
                    'url' : video_url,
                    'title' : video_title,
                    'thumbnail' : thumbnail,
                    'ext' : ext,
                    }]
        return results
Move WorldStarHipHop into its own file 2013-06-23 20:04:08 +00:00			`import re`

			`from .common import InfoExtractor`


			`class WorldStarHipHopIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\|m)\.worldstar(?:candy\|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'`
Allow moving tests into IE files Allow adding download tests right in the IE file. This will cut down on merge conflicts and make it more likely that new IE authors will add tests right away. 2013-06-27 16:28:45 +00:00			`_TEST = {`
			`"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",`
			`"file": "wshh6a7q1ny0G34ZwuIO.mp4",`
			`"md5": "9d04de741161603bf7071bbf4e883186",`
			`"info_dict": {`
			`"title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"`
			`}`
			`}`

Move WorldStarHipHop into its own file 2013-06-23 20:04:08 +00:00
			`def _real_extract(self, url):`
			`m = re.match(self._VALID_URL, url)`
			`video_id = m.group('id')`

			`webpage_src = self._download_webpage(url, video_id)`

using re.search 2013-07-29 21:39:14 +00:00			`video_url = re.search(r'videoId=(.*?)&amp?',`
			`webpage_src)`
detect vevo embed fix 2013-07-29 19:24:26 +00:00
detect vevo embed 2013-07-29 19:11:57 +00:00			`if video_url:`
			`self.to_screen(u'Vevo video detected:')`
using re.search 2013-07-29 21:39:14 +00:00			`return self.url_result('vevo:%s' % video_url.group(1), ie='Vevo')`
detect vevo embed 2013-07-29 19:11:57 +00:00
Move WorldStarHipHop into its own file 2013-06-23 20:04:08 +00:00			`video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',`
			`webpage_src, u'video URL')`

using re.search 2013-07-29 21:39:14 +00:00			`if video_url is None:`
detect vevo embed fix 2013-07-29 19:24:26 +00:00			`video_url = self._search_regex(r'videoId=(.*?)&amp?',`
			`webpage_src, u'video URL')`
			`self.to_screen(u'Vevo video detected:')`
			`vevo_id = 'vevo:%s' % video_url`
			`return self.url_result(vevo_id, ie='Vevo')`

added Youtube embed detection to WorldstarIE 2013-06-25 01:58:49 +00:00			`if 'youtube' in video_url:`
			`self.to_screen(u'Youtube video detected:')`
Merge pull request #922 from JohnyMoSwag/master Added embedded youtube detection to WorldstarIE 2013-06-25 20:07:31 +00:00			`return self.url_result(video_url, ie='Youtube')`
added Youtube embed detection to WorldstarIE 2013-06-25 01:58:49 +00:00
Move WorldStarHipHop into its own file 2013-06-23 20:04:08 +00:00			`if 'mp4' in video_url:`
			`ext = 'mp4'`
			`else:`
			`ext = 'flv'`

			`video_title = self._html_search_regex(r"<title>(.*)</title>",`
			`webpage_src, u'title')`

			`# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.`
			`thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',`
			`webpage_src, u'thumbnail', fatal=False)`

			`if not thumbnail:`
			`_title = r"""candytitles.>(.)</span>"""`
			`mobj = re.search(_title, webpage_src)`
			`if mobj is not None:`
			`video_title = mobj.group(1)`

			`results = [{`
			`'id': video_id,`
			`'url' : video_url,`
			`'title' : video_title,`
			`'thumbnail' : thumbnail,`
			`'ext' : ext,`
			`}]`
			`return results`