@ -20,6 +20,7 @@ from .exceptions import ScraperError
# fmt: off
# fmt: off
YOUTUBE_DURATION_REGEX = re . compile (
YOUTUBE_DURATION_REGEX = re . compile (
" P "
" P "
r " (?:(?P<weeks> \ d+)W)? "
r " (?:(?P<days> \ d+)D)? "
r " (?:(?P<days> \ d+)D)? "
" T "
" T "
r " (?:(?P<hours> \ d+)H)? "
r " (?:(?P<hours> \ d+)H)? "
@ -75,8 +76,8 @@ class YoutubeScraper:
return ScraperResult ( url , ScraperType . YOUTUBE , video_data )
return ScraperResult ( url , ScraperType . YOUTUBE , video_data )
@static method
def get_metadata_from_result ( result : ScraperResult ) - > Dict [ str , Any ] :
@class method
def get_metadata_from_result ( cls , result : ScraperResult ) - > Dict [ str , Any ] :
""" Get the metadata that we ' re interested in out of a scrape result. """
""" Get the metadata that we ' re interested in out of a scrape result. """
if result . scraper_type != ScraperType . YOUTUBE :
if result . scraper_type != ScraperType . YOUTUBE :
raise ValueError ( " Can ' t process a result from a different scraper. " )
raise ValueError ( " Can ' t process a result from a different scraper. " )
@ -101,7 +102,17 @@ class YoutubeScraper:
content_details = result . data . get ( " contentDetails " )
content_details = result . data . get ( " contentDetails " )
if content_details . get ( " duration " ) :
if content_details . get ( " duration " ) :
match = YOUTUBE_DURATION_REGEX . match ( content_details [ " duration " ] )
try :
metadata [ " duration " ] = cls . parse_duration ( content_details [ " duration " ] )
except ValueError :
pass
return metadata
@classmethod
def parse_duration ( cls , duration : str ) - > int :
""" Convert a YouTube duration (subset of ISO8601 duration) to seconds. """
match = YOUTUBE_DURATION_REGEX . match ( duration )
if not match :
if not match :
raise ValueError ( " Unable to parse duration " )
raise ValueError ( " Unable to parse duration " )
@ -115,12 +126,11 @@ class YoutubeScraper:
duration_components [ key ] = int ( value )
duration_components [ key ] = int ( value )
delta = timedelta (
delta = timedelta (
weeks = duration_components [ " weeks " ] ,
days = duration_components [ " days " ] ,
days = duration_components [ " days " ] ,
hours = duration_components [ " hours " ] ,
hours = duration_components [ " hours " ] ,
minutes = duration_components [ " minutes " ] ,
minutes = duration_components [ " minutes " ] ,
seconds = duration_components [ " seconds " ] ,
seconds = duration_components [ " seconds " ] ,
)
)
metadata [ " duration " ] = int ( delta . total_seconds ( ) )
return metadata
return int ( delta . total_seconds ( ) )