8000 added functionality for @username URL's by flyinggoatman · Pull Request #1444 · pytube/pytube · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

added functionality for @username URL's #1444

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 21 additions & 21 deletions pytube/cipher.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,19 +87,20 @@ def get_signature(self, ciphered_signature: str) -> str:
signature = list(ciphered_signature)

for js_func in self.transform_plan:
name, argument = self.parse_function(js_func) # type: ignore
signature = self.transform_map[name](signature, argument)
logger.debug(
"applied transform function\n"
"output: %s\n"
"js_function: %s\n"
"argument: %d\n"
"function: %s",
"".join(signature),
name,
argument,
self.transform_map[name],
)
if js_func:
name, argument = self.parse_function(js_func) # type: ignore
signature = self.transform_map[name](signature, argument)
logger.debug(
"applied transform function\n"
"output: %s\n"
"js_function: %s\n"
"argument: %d\n"
"function: %s",
"".join(signature),
name,
argument,
self.transform_map[name],
)

return "".join(signature)

Expand Down Expand Up @@ -129,7 +130,6 @@ def parse_function(self, js_func: str) -> Tuple[str, int]:
if parse_match:
fn_name, fn_arg = parse_match.groups()
return fn_name, int(fn_arg)

raise RegexMatchError(
caller="parse_function", pattern="js_func_patterns"
)
Expand Down Expand Up @@ -192,7 +192,8 @@ def get_transform_plan(js: str) -> List[str]:
'DE.kT(a,21)']
"""
name = re.escape(get_initial_function_name(js))
pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}" % name
# pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}" % name
pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;((\w+\.\w+\([\w\"\'\[\]\(\)\.\,\s]*\);)+)(?:.+)}" % name
logger.debug("getting transform plan")
return regex_search(pattern, js, group=1).split(";")

Expand Down Expand Up @@ -269,8 +270,10 @@ def get_throttling_function_name(js: str) -> str:
# a.C && (b = a.get("n")) && (b = Bpa[0](b), a.set("n", b),
# Bpa.length || iha("")) }};
# In the above case, `iha` is the relevant function name
r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&\s*'
r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&.*?\|\|\s*([a-z]+)',
r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])?\([a-z]\)',
r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])\([a-z]\)',

]
logger.debug('Finding throttling function name')
for pattern in function_patterns:
Expand Down Expand Up @@ -685,13 +688,10 @@ def map_functions(js_func: str) -> Callable:
# function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}
(r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\]=\w}", swap),
# function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c}
(
r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\%\w.length\]=\w}",
swap,
),
(r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\%\w.length\]=\w}",swap),
)

for pattern, fn in mapper:
if re.search(pattern, js_func):
return fn
raise RegexMatchError(caller="map_functions", pattern="multiple")
raise RegexMatchError(caller="map_functions", pattern="multiple")
153 changes: 126 additions & 27 deletions pytube/contrib/channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
"""Module for interacting with a user's youtube channel."""
import json
import logging
from typing import Dict, List, Optional, Tuple
from typing import Dict, List, Optional, Tuple, Iterable

from pytube import extract, Playlist, request
from pytube.helpers import uniqueify
from pytube import extract, YouTube, Playlist, request
from pytube.helpers import uniqueify, DeferredGeneratorList

logger = logging.getLogger(__name__)

Expand All @@ -28,6 +28,7 @@ def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
)

self.videos_url = self.channel_url + '/videos'
self.shorts_url = self.channel_url + '/shorts'
self.playlists_url = self.channel_url + '/playlists'
self.community_url = self.channel_url + '/community'
self.featured_channels_url = self.channel_url + '/channels'
Expand All @@ -39,12 +40,18 @@ def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
self._featured_channels_html = None
self._about_html = None

self._html_page = self.videos_url # Videos will be preferred over short videos
self._visitor_data = None

@property
def channel_name(self):
"""Get the name of the YouTube channel.

:rtype: str
"""

print(self.initial_data['metadata']['channelMetadataRenderer']['title'])

return self.initial_data['metadata']['channelMetadataRenderer']['title']

@property
Expand Down Expand Up @@ -75,7 +82,8 @@ def html(self):
"""
if self._html:
return self._html
self._html = request.get(self.videos_url)
self._html = request.get(self._html_page)
# self._html = request.get(self.shorts_url)
return self._html

@property
Expand Down Expand Up @@ -134,8 +142,42 @@ def about_html(self):
self._about_html = request.get(self.about_url)
return self._about_html

@staticmethod
def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
def _build_continuation_url(self, continuation: str) -> Tuple[str, dict, dict]:
"""Helper method to build the url and headers required to request
the next page of videos
:param str continuation: Continuation extracted from the json response
of the last page
:rtype: Tuple[str, dict, dict]
:returns: Tuple of an url and required headers for the next http
request
"""
return (
(
# was changed to this format (and post requests)
# between 2022.11.06 and 2022.11.20
"https://www.youtube.com/youtubei/v1/browse?key="
f"{self.yt_api_key}"
),
{
"X-YouTube-Client-Name": "1",
"X-YouTube-Client-Version": "2.20200720.00.02",
},
# extra data required for post request
{
"continuation": continuation,
"context": {
"client": {
"clientName": "WEB",
"visitorData": self._visitor_data,
"clientVersion": "2.20200720.00.02"
}
}
}
)

# @staticmethod
# def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
def _extract_videos(self, raw_json: str) -> Tuple[List[str], Optional[str]]:
"""Extracts videos from a raw json page

:param str raw_json: Input json extracted from the page or the last
Expand All @@ -145,15 +187,37 @@ def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
a continuation token, if more videos are available
"""
initial_data = json.loads(raw_json)

# this is the json tree structure, if the json was extracted from
# html
try:
videos = initial_data["contents"][
"twoColumnBrowseResultsRenderer"][
"tabs"][1]["tabRenderer"]["content"][
"sectionListRenderer"]["contents"][0][
"itemSectionRenderer"]["contents"][0][
"gridRenderer"]["items"]
# # videos = initial_data["contents"][
# # "twoColumnBrowseResultsRenderer"][
# # "tabs"][1]["tabRenderer"]["content"][
# # "richGridRenderer"]["contents"]
# videos = initial_data["contents"][
# "twoColumnBrowseResultsRenderer"][
# "tabs"][1]["tabRenderer"]["content"][
# "sectionListRenderer"]["contents"][0][
# "itemSectionRenderer"]["contents"][0][
# "gridRenderer"]["items"]
try:
# This is the json tree structure for videos
videos = initial_data["contents"][
"twoColumnBrowseResultsRenderer"][
"tabs"][1]["tabRenderer"]["content"]["richGridRenderer"]["contents"]

except(KeyError, IndexError, TypeError):
# This is the json tree structure for short videos
videos = initial_data["contents"][
"twoColumnBrowseResultsRenderer"][
"tabs"][2]["tabRenderer"]["content"]["richGridRenderer"]["contents"]

# This is the json tree structure of visitor data
# It is necessary to send the visitorData together with the continuation token
self._visitor_data = initial_data["responseContext"]["webResponseContextExtensionData"][
"ytConfigData"]["visitorData"]

except (KeyError, IndexError, TypeError):
try:
# this is the json tree structure, if the json was directly sent
Expand Down Expand Up @@ -183,19 +247,54 @@ def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
# if there is an error, no continuation is available
continuation = None

# only extract the video ids from the video data
videos_url = []
try:
# Extract id from videos
for x in videos:
videos_url.append(f"/watch?v="
f"{x['richItemRenderer']['content']['videoRenderer']['videoId']}")
except (KeyError, IndexError, TypeError):
# Extract id from short videos
for x in videos:
# videos_url.append(f"/watch?v="
# f"{x['richItemRenderer']['content']['reelItemRenderer']['videoId']}")
videos_url.append(f"/watch?v="
f"{x['richItemRenderer']['content']['shortsLockupViewModel']['entityId'][-11:]}")

# remove duplicates
return (
uniqueify(
list(
# only extract the video ids from the video data
map(
lambda x: (
f"/watch?v="
f"{x['gridVideoRenderer']['videoId']}"
),
videos
)
),
),
continuation,
)
# return (
# uniqueify(
# list(
# # only extract the video ids from the video data
# map(
# lambda x: (
# f"/watch?v="
# f"{x['richItemRenderer']['content']['videoRenderer']['videoId']}"
# ),
# videos
# )
# ),
# ),
# continuation,
# )

return uniqueify(videos_url), continuation

@property
def videos(self) -> Iterable[YouTube]:
"""Yields YouTube objects of videos in this channel
:rtype: List[YouTube]
:returns: List of YouTube
"""
self._html_page = self.videos_url # Set video tab
return DeferredGeneratorList(self.videos_generator())

@property
def shorts(self) -> Iterable[YouTube]:
"""Yields YouTube objects of short videos in this channel
:rtype: List[YouTube]
:returns: List of YouTube
"""
self._html_page = self.shorts_url # Set shorts tab
return DeferredGeneratorList(self.videos_generator())
8 changes: 4 additions & 4 deletions pytube/contrib/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ def fetch_and_parse(self, continuation=None):
continue

if 'videoRenderer' not in video_details:
logger.warning('Unexpected renderer encountered.')
logger.warning(f'Renderer name: {video_details.keys()}')
logger.warning(f'Search term: {self.query}')
logger.warning(
logger.warn('Unexpected renderer encountered.')
logger.warn(f'Renderer name: {video_details.keys()}')
logger.warn(f'Search term: {self.query}')
logger.warn(
'Please open an issue at '
'https://github.com/pytube/pytube/issues '
'and provide this log output.'
Expand Down
1 change: 1 addition & 0 deletions pytube/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def video_id(url: str) -> str:
- :samp:`https://youtube.com/embed/{video_id}`
- :samp:`https://youtu.be/{video_id}`


:param str url:
A YouTube url containing a video id.
:rtype: str
Expand Down
Loading
0