1
0
mirror of https://github.com/ytdl-org/youtube-dl synced 2024-12-16 14:36:47 +00:00

Compare commits

...

8 Commits

Author SHA1 Message Date
Bart Broere
6f271423e8 Update tests 2024-10-20 14:38:03 +02:00
Bart Broere
75266ce4ed Fix old metadata reference 2024-10-20 13:41:53 +02:00
Bart Broere
817e2e5938 Fix some missing imports 2024-10-20 12:37:48 +02:00
Bart Broere
0e1a0cfa03 Apply some more PR feedback 2024-10-20 12:28:10 +02:00
Bart Broere
7f1c09bea1 Use _sort_formats util 2024-10-20 12:11:30 +02:00
Bart Broere
c3026dd70c Apply suggestion from PR 2024-10-20 12:08:50 +02:00
Bart Broere
41157b2b49 Move GraphQL query into separate variable 2024-10-20 12:00:44 +02:00
Bart Broere
c748eca829 Automatically obtain NextJS buildId and change item to video-item 2024-10-20 11:49:09 +02:00

View File

@ -5,7 +5,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError, join_nonempty, traverse_obj from ..utils import ExtractorError, int_or_none, join_nonempty, merge_dicts, traverse_obj, url_or_none, T
class NPOIE(InfoExtractor): class NPOIE(InfoExtractor):
@ -17,23 +17,23 @@ class NPOIE(InfoExtractor):
'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/', 'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
'md5': 'f9ce9c43cc8bc3b8138df1562b99c379', 'md5': 'f9ce9c43cc8bc3b8138df1562b99c379',
'info_dict': { 'info_dict': {
'description': 'Wie is de mol? (2)', 'title': 'Wie is de mol? (2)',
'duration': 2439,
'ext': 'm4v',
'id': 'wie-is-de-mol-2',
'thumbnail': 'https://assets-start.npo.nl/resources/2023/07/01/e723c3cf-3e42-418a-9ba5-f6dbb64b516a.jpg', 'thumbnail': 'https://assets-start.npo.nl/resources/2023/07/01/e723c3cf-3e42-418a-9ba5-f6dbb64b516a.jpg',
'title': 'Wie is de mol? (2)' 'duration': 2439,
'id': 'wie-is-de-mol-2',
'description': 'wie-is-de-mol-2',
'ext': 'mp4',
} }
}, { }, {
'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika', 'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika',
'md5': 'c84d054219c4888ed53b4ee3d01b2d93', 'md5': 'c84d054219c4888ed53b4ee3d01b2d93',
'info_dict': { 'info_dict': {
'id': 'zwart-geld-de-toekomst-komt-uit-afrika',
'title': 'Zwart geld: de toekomst komt uit Afrika', 'title': 'Zwart geld: de toekomst komt uit Afrika',
'ext': 'mp4',
'description': 'Zwart geld: de toekomst komt uit Afrika',
'thumbnail': 'https://assets-start.npo.nl/resources/2023/06/30/d9879593-1944-4249-990c-1561dac14d8e.jpg', 'thumbnail': 'https://assets-start.npo.nl/resources/2023/06/30/d9879593-1944-4249-990c-1561dac14d8e.jpg',
'duration': 3000 'duration': 3000,
'id': 'zwart-geld-de-toekomst-komt-uit-afrika',
'description': 'zwart-geld-de-toekomst-komt-uit-afrika',
'ext': 'mp4',
}, },
}] }]
@ -50,30 +50,22 @@ class NPOIE(InfoExtractor):
program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail', program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',
slug, query={'slug': slug}) slug, query={'slug': slug})
product_id = program_metadata.get('productId') product_id = traverse_obj(program_metadata, 'productId')
images = program_metadata.get('images')
thumbnail = None
for image in images:
thumbnail = image.get('url')
break
title = program_metadata.get('title')
descriptions = program_metadata.get('description', {})
description = descriptions.get('long') or descriptions.get('short') or descriptions.get('brief')
duration = program_metadata.get('durationInSeconds')
if not product_id: if not product_id:
raise ExtractorError('No productId found for slug: %s' % slug) raise ExtractorError('No productId found for slug: %s' % (slug,))
formats = self._extract_formats_by_product_id(product_id, slug, url) formats = self._extract_formats_by_product_id(product_id, slug, url)
self._sort_formats(formats)
return { return merge_dicts(traverse_obj(program_metadata, {
'title': 'title',
'description': (('description', ('long', 'short', 'brief')), 'title'),
'thumbnail': ('images', Ellipsis, 'url', T(url_or_none)),
'duration': ('durationInSeconds', T(int_or_none)),
}, get_all=False), {
'id': slug, 'id': slug,
'formats': formats, 'formats': formats,
'title': title or slug, 'title': slug,
'description': description or title or slug, 'description': slug,
'thumbnail': thumbnail, })
'duration': duration,
}
def _extract_formats_by_product_id(self, product_id, slug, url=None): def _extract_formats_by_product_id(self, product_id, slug, url=None):
token = self._get_token(product_id) token = self._get_token(product_id)
@ -118,6 +110,70 @@ class BNNVaraIE(NPOIE):
def _real_extract(self, url): def _real_extract(self, url):
url = url.rstrip('/') url = url.rstrip('/')
video_id = url.split('/')[-1] video_id = url.split('/')[-1]
graphql_query = """query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {
player(
id: $id
mediaUrl: $mediaUrl
hasAdConsent: $hasAdConsent
atInternetId: $atInternetId
) {
... on PlayerSucces {
brand {
name
slug
broadcastsEnabled
__typename
}
title
programTitle
pomsProductId
broadcasters {
name
__typename
}
duration
classifications {
title
imageUrl
type
__typename
}
image {
title
url
__typename
}
cta {
title
url
__typename
}
genres {
name
__typename
}
subtitles {
url
language
__typename
}
sources {
name
url
ratio
__typename
}
type
token
__typename
}
... on PlayerError {
error
__typename
}
__typename
}
}"""
media = self._download_json('https://api.bnnvara.nl/bff/graphql', media = self._download_json('https://api.bnnvara.nl/bff/graphql',
video_id, video_id,
@ -129,14 +185,15 @@ class BNNVaraIE(NPOIE):
'hasAdConsent': False, 'hasAdConsent': False,
'atInternetId': 70 'atInternetId': 70
}, },
'query': 'query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {\n player(\n id: $id\n mediaUrl: $mediaUrl\n hasAdConsent: $hasAdConsent\n atInternetId: $atInternetId\n ) {\n ... on PlayerSucces {\n brand {\n name\n slug\n broadcastsEnabled\n __typename\n }\n title\n programTitle\n pomsProductId\n broadcasters {\n name\n __typename\n }\n duration\n classifications {\n title\n imageUrl\n type\n __typename\n }\n image {\n title\n url\n __typename\n }\n cta {\n title\n url\n __typename\n }\n genres {\n name\n __typename\n }\n subtitles {\n url\n language\n __typename\n }\n sources {\n name\n url\n ratio\n __typename\n }\n type\n token\n __typename\n }\n ... on PlayerError {\n error\n __typename\n }\n __typename\n }\n}' 'query': graphql_query
}).encode('utf8'), }).encode('utf8'),
headers={ headers={
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}) })
product_id = media.get('data', {}).get('player', {}).get('pomsProductId')
formats = self._extract_formats_by_product_id(product_id, video_id) product_id = traverse_obj(media, ('data', 'player', 'pomsProductId'))
formats = self._extract_formats_by_product_id(product_id, video_id) if product_id else []
self._sort_formats(formats)
return { return {
'id': product_id, 'id': product_id,
@ -154,7 +211,9 @@ class ONIE(NPOIE):
'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/', 'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/',
'md5': 'a85ebd50fa86fe5cbce654655f7dbb12', 'md5': 'a85ebd50fa86fe5cbce654655f7dbb12',
'info_dict': { 'info_dict': {
'id': 'heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel',
'title': 'heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel',
'ext': 'mp4',
} }
}] }]
@ -166,9 +225,7 @@ class ONIE(NPOIE):
for result in results: for result in results:
formats.extend(self._extract_formats_by_product_id(result, video_id)) formats.extend(self._extract_formats_by_product_id(result, video_id))
if not formats: self._sort_formats(formats)
raise ExtractorError('Could not find a POMS product id in the provided URL, '
'perhaps because all stream URLs are DRM protected.')
return { return {
'id': video_id, 'id': video_id,
@ -188,6 +245,7 @@ class ZAPPIE(NPOIE):
'info_dict': { 'info_dict': {
'id': 'POMS_AT_811523', 'id': 'POMS_AT_811523',
'title': 'POMS_AT_811523', 'title': 'POMS_AT_811523',
'ext': 'mp4',
}, },
}] }]
@ -213,21 +271,23 @@ class SchoolTVIE(NPOIE):
'md5': 'e9ef151c4886994e2bea23593348cb14', 'md5': 'e9ef151c4886994e2bea23593348cb14',
'info_dict': { 'info_dict': {
'id': 'zapp-music-challenge-2015-zapp-music-challenge-2015', 'id': 'zapp-music-challenge-2015-zapp-music-challenge-2015',
'title': 'Zapp Music Challenge 2015 - Alain Clark & Yaell', 'title': 'Zapp Music Challenge 2015-Alain Clark & Yaell',
'description': "Een nummer schrijven met de super bekende soulzanger en producer Alain Clark? Dat is de uitdaging voor de dertienjarige Yaell uit Delft. En als het dan echt goed is, mag hij het ook nog eens live gaan spelen op de speelplaats bij Giel Beelen! Muziek is heel erg belangrijk in het leven van Yaell. 'Als er geen muziek zou zijn, dan zou ik heel veel niet kunnen.' Hij is dan ook altijd aan het schrijven, vaak over zijn eigen leven. Maar soms is het best lastig om die teksten te verzinnen. Vindt hij de inspiratie om een hit te maken met Alain?" 'description': "Een nummer schrijven met de super bekende soulzanger en producer Alain Clark? Dat is de uitdaging voor de dertienjarige Yaell uit Delft. En als het dan echt goed is, mag hij het ook nog eens live gaan spelen op de speelplaats bij Giel Beelen! Muziek is heel erg belangrijk in het leven van Yaell. 'Als er geen muziek zou zijn, dan zou ik heel veel niet kunnen.' Hij is dan ook altijd aan het schrijven, vaak over zijn eigen leven. Maar soms is het best lastig om die teksten te verzinnen. Vindt hij de inspiratie om een hit te maken met Alain?",
'ext': 'mp4',
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = url.rstrip('/').split('/')[-1] video_id = url.rstrip('/').split('/')[-1]
# TODO Find out how we could obtain this automatically build_id = self._search_nextjs_data(
# Otherwise this extractor might break each time SchoolTV deploys a new release self._download_webpage(url, video_id),
build_id = 'b7eHUzAVO7wHXCopYxQhV' video_id,
)['buildId']
metadata_url = 'https://schooltv.nl/_next/data/' \ metadata_url = 'https://schooltv.nl/_next/data/' \
+ build_id \ + build_id \
+ '/item/' \ + '/video-item/' \
+ video_id + '.json' + video_id + '.json'
metadata = self._download_json(metadata_url, metadata = self._download_json(metadata_url,
@ -235,9 +295,7 @@ class SchoolTVIE(NPOIE):
formats = self._extract_formats_by_product_id(metadata.get('poms_mid'), video_id) formats = self._extract_formats_by_product_id(metadata.get('poms_mid'), video_id)
if not formats: self._sort_formats(formats)
raise ExtractorError('Could not find a POMS product id in the provided URL, '
'perhaps because all stream URLs are DRM protected.')
return { return {
'id': video_id, 'id': video_id,
@ -258,9 +316,7 @@ class NTRSubsiteIE(NPOIE):
formats.extend(self._extract_formats_by_product_id(result, video_id)) formats.extend(self._extract_formats_by_product_id(result, video_id))
break break
if not formats: self._sort_formats(formats)
raise ExtractorError('Could not find a POMS product id in the provided URL, '
'perhaps because all stream URLs are DRM protected.')
return { return {
'id': video_id, 'id': video_id,
@ -279,6 +335,7 @@ class HetKlokhuisIE(NTRSubsiteIE):
'info_dict': { 'info_dict': {
'id': 'aliens', 'id': 'aliens',
'title': 'aliens', 'title': 'aliens',
'ext': 'mp4',
}, },
}] }]
@ -293,7 +350,7 @@ class VPROIE(NPOIE):
'info_dict': { 'info_dict': {
'id': 'offline-als-luxe.html', 'id': 'offline-als-luxe.html',
'title': 'offline-als-luxe.html', 'title': 'offline-als-luxe.html',
'ext': 'm4v', 'ext': 'mp4',
}, },
}] }]
@ -304,11 +361,9 @@ class VPROIE(NPOIE):
formats = [] formats = []
for result in results: for result in results:
formats.extend(self._extract_formats_by_product_id(result, video_id)) formats.extend(self._extract_formats_by_product_id(result, video_id))
break # TODO find a better solution, VPRO pages can have multiple videos embedded break
if not formats: self._sort_formats(formats)
raise ExtractorError('Could not find a POMS product id in the provided URL, '
'perhaps because all stream URLs are DRM protected.')
return { return {
'id': video_id, 'id': video_id,
@ -327,5 +382,6 @@ class AndereTijdenIE(NTRSubsiteIE):
'info_dict': { 'info_dict': {
'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem', 'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
'title': 'Duitse-soldaten-over-de-Slag-bij-Arnhem', 'title': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
'ext': 'mp4',
}, },
}] }]