pytube · flyinggoatman · Apr 23, 2025 · Apr 23, 2025
diff --git a/pytube/cipher.py b/pytube/cipher.py
@@ -87,19 +87,20 @@ def get_signature(self, ciphered_signature: str) -> str:
         signature = list(ciphered_signature)
 
         for js_func in self.transform_plan:
-            name, argument = self.parse_function(js_func)  # type: ignore
-            signature = self.transform_map[name](signature, argument)
-            logger.debug(
-                "applied transform function\n"
-                "output: %s\n"
-                "js_function: %s\n"
-                "argument: %d\n"
-                "function: %s",
-                "".join(signature),
-                name,
-                argument,
-                self.transform_map[name],
-            )
+            if js_func:
+                name, argument = self.parse_function(js_func)  # type: ignore
+                signature = self.transform_map[name](signature, argument)
+                logger.debug(
+                    "applied transform function\n"
+                    "output: %s\n"
+                    "js_function: %s\n"
+                    "argument: %d\n"
+                    "function: %s",
+                    "".join(signature),
+                    name,
+                    argument,
+                    self.transform_map[name],
+                )
 
         return "".join(signature)
 
@@ -129,7 +130,6 @@ def parse_function(self, js_func: str) -> Tuple[str, int]:
             if parse_match:
                 fn_name, fn_arg = parse_match.groups()
                 return fn_name, int(fn_arg)
-
         raise RegexMatchError(
             caller="parse_function", pattern="js_func_patterns"
         )
@@ -192,7 +192,8 @@ def get_transform_plan(js: str) -> List[str]:
     'DE.kT(a,21)']
     """
     name = re.escape(get_initial_function_name(js))
-    pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}" % name
+    # pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}" % name
+    pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;((\w+\.\w+\([\w\"\'\[\]\(\)\.\,\s]*\);)+)(?:.+)}" % name
     logger.debug("getting transform plan")
     return regex_search(pattern, js, group=1).split(";")
 
@@ -269,8 +270,10 @@ def get_throttling_function_name(js: str) -> str:
         # a.C && (b = a.get("n")) && (b = Bpa[0](b), a.set("n", b),
         # Bpa.length || iha("")) }};
         # In the above case, `iha` is the relevant function name
-        r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&\s*'
+        r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&.*?\|\|\s*([a-z]+)',
         r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])?\([a-z]\)',
+        r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])\([a-z]\)',
+
     ]
     logger.debug('Finding throttling function name')
     for pattern in function_patterns:
@@ -685,13 +688,10 @@ def map_functions(js_func: str) -> Callable:
         # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}
         (r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\]=\w}", swap),
         # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c}
-        (
-            r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\%\w.length\]=\w}",
-            swap,
-        ),
+        (r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\%\w.length\]=\w}",swap),
     )
 
     for pattern, fn in mapper:
         if re.search(pattern, js_func):
             return fn
-    raise RegexMatchError(caller="map_functions", pattern="multiple")
+    raise RegexMatchError(caller="map_functions", pattern="multiple")
diff --git a/pytube/contrib/channel.py b/pytube/contrib/channel.py
@@ -2,10 +2,10 @@
 """Module for interacting with a user's youtube channel."""
 import json
 import logging
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple, Iterable
 
-from pytube import extract, Playlist, request
-from pytube.helpers import uniqueify
+from pytube import extract, YouTube, Playlist, request
+from pytube.helpers import uniqueify, DeferredGeneratorList
 
 logger = logging.getLogger(__name__)
 
@@ -28,6 +28,7 @@ def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
         )
 
         self.videos_url = self.channel_url + '/videos'
+        self.shorts_url = self.channel_url + '/shorts'
         self.playlists_url = self.channel_url + '/playlists'
         self.community_url = self.channel_url + '/community'
         self.featured_channels_url = self.channel_url + '/channels'
@@ -39,12 +40,18 @@ def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
         self._featured_channels_html = None
         self._about_html = None
 
+        self._html_page = self.videos_url  # Videos will be preferred over short videos
+        self._visitor_data = None
+
     @property
     def channel_name(self):
         """Get the name of the YouTube channel.
 
         :rtype: str
         """
+
+        print(self.initial_data['metadata']['channelMetadataRenderer']['title'])
+
         return self.initial_data['metadata']['channelMetadataRenderer']['title']
 
     @property
@@ -75,7 +82,8 @@ def html(self):
         """
         if self._html:
             return self._html
-        self._html = request.get(self.videos_url)
+        self._html = request.get(self._html_page)
+        # self._html = request.get(self.shorts_url)
         return self._html
 
     @property
@@ -134,8 +142,42 @@ def about_html(self):
             self._about_html = request.get(self.about_url)
             return self._about_html
 
-    @staticmethod
-    def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
+    def _build_continuation_url(self, continuation: str) -> Tuple[str, dict, dict]:
+        """Helper method to build the url and headers required to request
+        the next page of videos
+        :param str continuation: Continuation extracted from the json response
+            of the last page
+        :rtype: Tuple[str, dict, dict]
+        :returns: Tuple of an url and required headers for the next http
+            request
+        """
+        return (
+            (
+                # was changed to this format (and post requests)
+                # between 2022.11.06 and 2022.11.20
+                "https://www.youtube.com/youtubei/v1/browse?key="
+                f"{self.yt_api_key}"
+            ),
+            {
+                "X-YouTube-Client-Name": "1",
+                "X-YouTube-Client-Version": "2.20200720.00.02",
+            },
+            # extra data required for post request
+            {
+                "continuation": continuation,
+                "context": {
+                    "client": {
+                        "clientName": "WEB",
+                        "visitorData": self._visitor_data,
+                        "clientVersion": "2.20200720.00.02"
+                    }
+                }
+            }
+        )
+
+    # @staticmethod
+    # def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
+    def _extract_videos(self, raw_json: str) -> Tuple[List[str], Optional[str]]:
         """Extracts videos from a raw json page
 
         :param str raw_json: Input json extracted from the page or the last
@@ -145,15 +187,37 @@ def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
             a continuation token, if more videos are available
         """
         initial_data = json.loads(raw_json)
+
         # this is the json tree structure, if the json was extracted from
         # html
         try:
-            videos = initial_data["contents"][
-                "twoColumnBrowseResultsRenderer"][
-                "tabs"][1]["tabRenderer"]["content"][
-                "sectionListRenderer"]["contents"][0][
-                "itemSectionRenderer"]["contents"][0][
-                "gridRenderer"]["items"]
+        #     # videos = initial_data["contents"][
+        #     #     "twoColumnBrowseResultsRenderer"][
+        #     #     "tabs"][1]["tabRenderer"]["content"][
+        #     #     "richGridRenderer"]["contents"]
+        #     videos = initial_data["contents"][
+        #         "twoColumnBrowseResultsRenderer"][
+        #         "tabs"][1]["tabRenderer"]["content"][
+        #         "sectionListRenderer"]["contents"][0][
+        #         "itemSectionRenderer"]["contents"][0][
+        #         "gridRenderer"]["items"]
+            try:
+                # This is the json tree structure for videos
+                videos = initial_data["contents"][
+                    "twoColumnBrowseResultsRenderer"][
+                    "tabs"][1]["tabRenderer"]["content"]["richGridRenderer"]["contents"]
+
+            except(KeyError, IndexError, TypeError):
+                # This is the json tree structure for short videos
+                videos = initial_data["contents"][
+                    "twoColumnBrowseResultsRenderer"][
+                    "tabs"][2]["tabRenderer"]["content"]["richGridRenderer"]["contents"]
+
+            # This is the json tree structure of visitor data
+            # It is necessary to send the visitorData together with the continuation token
+            self._visitor_data = initial_data["responseContext"]["webResponseContextExtensionData"][
+                "ytConfigData"]["visitorData"]
+
         except (KeyError, IndexError, TypeError):
             try:
                 # this is the json tree structure, if the json was directly sent
@@ -183,19 +247,54 @@ def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
             # if there is an error, no continuation is available
             continuation = None
 
+        # only extract the video ids from the video data
+        videos_url = []
+        try:
+            # Extract id from videos
+            for x in videos:
+                videos_url.append(f"/watch?v="
+                                  f"{x['richItemRenderer']['content']['videoRenderer']['videoId']}")
+        except (KeyError, IndexError, TypeError):
+            # Extract id from short videos
+            for x in videos:
+                # videos_url.append(f"/watch?v="
+                #                   f"{x['richItemRenderer']['content']['reelItemRenderer']['videoId']}")
+                videos_url.append(f"/watch?v="
+                                  f"{x['richItemRenderer']['content']['shortsLockupViewModel']['entityId'][-11:]}")
+
         # remove duplicates
-        return (
-            uniqueify(
-                list(
-                    # only extract the video ids from the video data
-                    map(
-                        lambda x: (
-                            f"/watch?v="
-                            f"{x['gridVideoRenderer']['videoId']}"
-                        ),
-                        videos
-                    )
-                ),
-            ),
-            continuation,
-        )
+        # return (
+        #     uniqueify(
+        #         list(
+        #             # only extract the video ids from the video data
+        #             map(
+        #                 lambda x: (
+        #                     f"/watch?v="
+        #                     f"{x['richItemRenderer']['content']['videoRenderer']['videoId']}"
+        #                 ),
+        #                 videos
+        #             )
+        #         ),
+        #     ),
+        #     continuation,
+        # )
+
+        return uniqueify(videos_url), continuation
+
+        @property
+    def videos(self) -> Iterable[YouTube]:
+        """Yields YouTube objects of videos in this channel
+        :rtype: List[YouTube]
+        :returns: List of YouTube
+        """
+        self._html_page = self.videos_url  # Set video tab
+        return DeferredGeneratorList(self.videos_generator())
+
+    @property
+    def shorts(self) -> Iterable[YouTube]:
+        """Yields YouTube objects of short videos in this channel
+       :rtype: List[YouTube]
+       :returns: List of YouTube
+       """
+        self._html_page = self.shorts_url  # Set shorts tab
+        return DeferredGeneratorList(self.videos_generator())
diff --git a/pytube/contrib/search.py b/pytube/contrib/search.py
@@ -150,10 +150,10 @@ def fetch_and_parse(self, continuation=None):
                     continue
 
                 if 'videoRenderer' not in video_details:
-                    logger.warning('Unexpected renderer encountered.')
-                    logger.warning(f'Renderer name: {video_details.keys()}')
-                    logger.warning(f'Search term: {self.query}')
-                    logger.warning(
+                    logger.warn('Unexpected renderer encountered.')
+                    logger.warn(f'Renderer name: {video_details.keys()}')
+                    logger.warn(f'Search term: {self.query}')
+                    logger.warn(
                         'Please open an issue at '
                         'https://github.com/pytube/pytube/issues '
                         'and provide this log output.'

diff --git a/pytube/extract.py b/pytube/extract.py
@@ -124,6 +124,7 @@ def video_id(url: str) -> str:
     - :samp:`https://youtube.com/embed/{video_id}`
     - :samp:`https://youtu.be/{video_id}`
 
+
     :param str url:
         A YouTube url containing a video id.
     :rtype: str