diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml index 52c4d5b..442044a 100644 --- a/.github/workflows/pythonapp.yml +++ b/.github/workflows/pythonapp.yml @@ -29,5 +29,7 @@ jobs: run: poetry install - name: Install dependencies tests run: poetry install --with test + - name: Make repo importable (set PYTHONPATH) + run: echo "PYTHONPATH=$GITHUB_WORKSPACE" >> $GITHUB_ENV - name: Run tests run: poetry run pytest diff --git a/pixivpy3/aapi.py b/pixivpy3/aapi.py index f6ffd24..29fee31 100644 --- a/pixivpy3/aapi.py +++ b/pixivpy3/aapi.py @@ -6,7 +6,6 @@ from __future__ import annotations import datetime as dt -import re import urllib.parse as up from typing import Any, Literal, Union @@ -859,26 +858,99 @@ def webview_novel( raw: bool = False, req_auth: bool = True, ) -> models.WebviewNovel | str: - # change new endpoint due to #337 - url = f"{self.hosts}/webview/v2/novel" - params = { - "id": novel_id, - "viewer_version": "20221031_ai", - } + """使用AJAX API获取小说内容(按照TypeScript代码思路)""" - r = self.no_auth_requests_call("GET", url, params=params, req_auth=req_auth) + # 构建AJAX URL - 注意要用www而不是app-api + if "app-api" in self.hosts: + base_url = self.hosts.replace("app-api", "www") + else: + base_url = self.hosts + + url = f"{base_url}/ajax/novel/{novel_id}" + + headers = { + "Accept": "application/json, text/plain, */*", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", + "Accept-Encoding": "gzip, deflate, br", + "X-Requested-With": "XMLHttpRequest", + "Referer": f"{base_url}/novel/show.php?id={novel_id}", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "DNT": "1", + "Connection": "keep-alive", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "Cache-Control": "no-cache", + "Pragma": "no-cache", + "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": '"Windows"', + } + + # 添加随机延迟,避免被检测 + import time + import random + + time.sleep(random.uniform(1, 3)) + r = self.no_auth_requests_call("GET", url, headers=headers, req_auth=req_auth) if raw: return r.text - # extract JSON content - match = re.search(r"novel:\s({.+}),\s+isOwnWork", r.text) - if not match or len(match.groups()) < 1: - msg = f"Extract novel content error: {r.text}" - raise PixivError(msg, header=r.headers, body=r.text) - - json_str = match.groups()[0].encode() - json_data = self.parse_json(json_str) - return self._load_model(json_data, models.WebviewNovel) + try: + # 使用现有的parse_result方法 + json_data = self.parse_result(r) + + # 检查API错误 + if "error" in json_data and json_data["error"]: + error_msg = json_data.get("message", "未知错误") + raise PixivError(f"API错误: {error_msg}") + + # 按照TS代码,数据在body中 + if "body" not in json_data: + msg = f"AJAX API响应格式不正确: {list(json_data.keys())}" + raise PixivError(msg, header=r.headers, body=r.text) + + novel_data = json_data["body"] + if not novel_data: + raise PixivError("API返回空数据") + + # 直接返回原始数据,避免模型验证问题 + if raw: + return novel_data + + # 创建一个简化的结果对象,包含主要信息 + class SimpleNovelResult: + def __init__(self, data): + self.raw_data = data + self.title = data.get("title", "") + # 尝试多个可能的字段名来获取小说内容 + self.text = ( + data.get("text", "") + or data.get("content", "") + or data.get("novelText", "") + ) + self.description = data.get("description", "") + self.author_name = data.get("authorName", "") or data.get( + "userName", "" + ) + self.create_date = data.get("createDate", "") + self.bookmark_count = data.get("bookmarkCount", 0) + self.comment_count = data.get("commentCount", 0) + self.total_view = data.get("totalView", 0) or data.get( + "viewCount", 0 + ) + + def __getattr__(self, name): + # 允许访问原始数据中的任何字段 + return self.raw_data.get(name, None) + + return SimpleNovelResult(novel_data) + + except PixivError: + raise + except Exception as e: + msg = f"处理AJAX响应时出错: {e}" + raise PixivError(msg, header=r.headers, body=r.text) from e # 小说正文 (deprecated) def novel_text( diff --git a/pixivpy3/bapi.py b/pixivpy3/bapi.py index f97ac9e..fb4ae87 100644 --- a/pixivpy3/bapi.py +++ b/pixivpy3/bapi.py @@ -43,19 +43,38 @@ def require_appapi_hosts( response = requests.get( url, headers=headers, params=params, timeout=timeout ) - except (requests.exceptions.JSONDecodeError, KeyError): + response.raise_for_status() # 检查HTTP状态码 + + # 解析JSON响应 + json_data = response.json() + + # 检查响应格式 + if "Answer" not in json_data or not json_data["Answer"]: + logger.debug(f"No Answer field in response from '{url}'") + continue + + domain_data = json_data["Answer"][0]["data"] + self.hosts = f"https://{domain_data}" + logger.info( + f"Successfully resolved {hostname} to {domain_data} via {url}" + ) + return self.hosts + + except (requests.exceptions.JSONDecodeError, KeyError, IndexError) as e: logger.debug( - f"Unable to get according hostname info from '{url}', skipping...", + f"Unable to parse response from '{url}': {e}", exc_info=True, ) - except requests.ConnectionError: + except requests.ConnectionError as e: logger.debug( - f"Unable to establish connection to '{url}', skipping...", + f"Unable to establish connection to '{url}': {e}", + exc_info=True, + ) + except requests.RequestException as e: + logger.debug( + f"Request failed for '{url}': {e}", exc_info=True, ) - else: - domain_data = response.json()["Answer"][0]["data"] - self.hosts = f"https://{domain_data}" - return self.hosts + logger.warning(f"Failed to resolve {hostname} via any DoH service") return False diff --git a/pixivpy3/models.py b/pixivpy3/models.py index fa9a950..8ccbebb 100644 --- a/pixivpy3/models.py +++ b/pixivpy3/models.py @@ -278,26 +278,52 @@ class Config: alias_generator = to_camel allow_population_by_field_name = True - id: str - title: str - series_id: Optional[str] - series_title: Optional[str] - series_is_watched: Optional[bool] - user_id: str - cover_url: str - tags: List[str] - caption: str - cdate: str - rating: NovelRating - text: str - marker: Optional[str] - illusts: List[str] - images: List[str] - series_navigation: Union[NovelNavigationInfo, EmptyObject, None] - glossary_items: List[str] - replaceable_item_ids: List[str] - ai_type: int - is_original: bool + # 基础字段 + id: Optional[str] = None + title: Optional[str] = None + series_id: Optional[str] = None + series_title: Optional[str] = None + series_is_watched: Optional[bool] = None + user_id: Optional[str] = None + cover_url: Optional[str] = None + tags: Optional[List[str]] = None + caption: Optional[str] = None + cdate: Optional[str] = None + rating: Optional[NovelRating] = None + text: Optional[str] = None + marker: Optional[str] = None + illusts: Optional[List[str]] = None + images: Optional[List[str]] = None + series_navigation: Optional[Union[NovelNavigationInfo, EmptyObject, None]] = None + glossary_items: Optional[List[str]] = None + replaceable_item_ids: Optional[List[str]] = None + ai_type: Optional[int] = None + is_original: Optional[bool] = None + + # AJAX API返回的额外字段 + bookmark_count: Optional[int] = None + comment_count: Optional[int] = None + marker_count: Optional[int] = None + create_date: Optional[str] = None + upload_date: Optional[str] = None + description: Optional[str] = None + author_id: Optional[str] = None + author_name: Optional[str] = None + author_account: Optional[str] = None + author_profile_image_urls: Optional[dict] = None + is_bookmarked: Optional[bool] = None + is_followed: Optional[bool] = None + is_mypixiv_only: Optional[bool] = None + is_x_restricted: Optional[bool] = None + novel_ai_type: Optional[int] = None + restrict: Optional[int] = None + x_restrict: Optional[int] = None + total_view: Optional[int] = None + total_bookmarks: Optional[int] = None + total_comments: Optional[int] = None + visible: Optional[bool] = None + is_muted: Optional[bool] = None + comment_access_control: Optional[int] = None class UserBookmarksNovel(BasePixivpyModel): diff --git a/tests/test_read_novel.py b/tests/test_read_novel.py new file mode 100644 index 0000000..7839e09 --- /dev/null +++ b/tests/test_read_novel.py @@ -0,0 +1,71 @@ +# test_webview_novel.py +import time +import random + +# 现在再导入pixivpy +from pixivpy3 import ByPassSniApi + + +def test_webview_novel(): + # 方案1: 使用ByPassSniApi绕过GFW和Cloudflare + print("=" * 60) + print("方案1: 使用ByPassSniApi") + print("=" * 60) + + api = ByPassSniApi() + + # 尝试获取真实IP,如果失败则使用默认hosts + try: + hosts_result = api.require_appapi_hosts() + if hosts_result: + print(f"✓ 成功解析真实IP: {hosts_result}") + else: + print("⚠️ DNS解析失败,使用默认hosts") + except Exception as e: + print(f"⚠️ DNS解析出错: {e},使用默认hosts") + + api.set_accept_language("zh-CN,zh;q=0.9,en;q=0.8") + + # 需要先登录 - 使用您的refresh_token + try: + api.auth(refresh_token="") + print("✓ 登录成功") + except Exception as e: + print(f"✗ 登录失败: {e}") + return + + # 测试用的小说ID + test_novel_ids = [] + + for novel_id in test_novel_ids: + print(f"\n测试小说ID: {novel_id}") + + try: + # 添加随机延迟,避免被检测 + time.sleep(random.uniform(1, 3)) + + result = api.webview_novel(novel_id=novel_id, raw=False, req_auth=True) + + print("✓ 获取成功") + print(f"✓ 返回类型: {type(result)}") + + # 打印小说基本信息 + if hasattr(result, "title"): + print(f"✓ 标题: {result.title}") + if hasattr(result, "user_name"): + print(f"✓ 作者: {result.userName}") + if hasattr(result, "text"): + text_length = len(result.text) if result.text else 0 + print(f"✓ 内容长度: {text_length} 字符") + if text_length > 0: + print(f"✓ 内容预览: {result.text[:500]}...") + else: + print("⚠️ 内容为空") + + except Exception as e: + print(f"✗ 获取失败: {e}") + print(f"✗ 错误类型: {type(e)}") + + +if __name__ == "__main__": + test_webview_novel()