mirror of
https://github.com/levywang/avhub.git
synced 2026-02-21 08:47:22 +08:00
feat: 1.Optimize search result label styles and deduplicate API content
2.Improve backend concurrency\n- Add copyright information 3.Optimize search result sorting rules 4.Add caching functionality and related configurations 5.Update documentation
This commit is contained in:
parent
49855b7b7c
commit
474b0a3041
@ -2,13 +2,13 @@ FROM python:3.13-slim
|
||||
|
||||
# 设置工作目录
|
||||
WORKDIR /app
|
||||
COPY . /app
|
||||
|
||||
# 安装依赖
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends nginx
|
||||
RUN pip install --no-cache-dir beautifulsoup4 fastapi requests uvicorn hydra-core curl_cffi schedule
|
||||
RUN pip3 install -r requirements.txt
|
||||
|
||||
# 复制应用代码
|
||||
COPY . /app
|
||||
# 修改Nginx配置
|
||||
RUN rm -rf /etc/nginx/sites-enabled/default && cp /app/nginx.example.conf /etc/nginx/sites-enabled/default
|
||||
|
||||
CMD ["sh", "-c", "python3 main.py & nginx -g 'daemon off;'"]
|
||||
|
||||
@ -2,9 +2,9 @@
|
||||
<img src="web\imgs\logo_opaque.png" alt="FTP Web Client Logo">
|
||||
</div>
|
||||
|
||||
# AvHub - R18 Resource Search & Management Tool
|
||||
# AvHub - R18 Resource Search & Management Tool
|
||||
|
||||
**AvHub** is a web platform dedicated to the retrieval and management of adult video resources.
|
||||
**AvHub** is a web platform dedicated to the retrieval and management of adult video resources.
|
||||
|
||||
Cloudflare Page: https://avhub.pages.dev/
|
||||
|
||||
@ -55,7 +55,7 @@ python main.py
|
||||
```
|
||||
The default API address: `http://127.0.0.1:8000/`
|
||||
|
||||
You can configure a reverse proxy and domain, replacing `BASE_URL` in line 52 of `web/script.js`.
|
||||
You can configure a reverse proxy and domain, replacing `BASE_URL` in line 38 of `web/script.js`.
|
||||
|
||||
The backend configuration file is located in `data/config.yaml`. Modify it according to your actual needs.
|
||||
|
||||
|
||||
@ -2,9 +2,9 @@
|
||||
<img src="web\imgs\logo_opaque.png" alt="FTP Web Client Logo">
|
||||
</div>
|
||||
|
||||
# AvHub - 成人影视资源管理平台
|
||||
# AvHub - R18 资源搜索和管理工具
|
||||
|
||||
**AvHub** 是一款专注成人影视资源检索与管理的Web平台
|
||||
**AvHub** 是一个致力于检索和管理成人视频资源的 Web 平台
|
||||
|
||||
Cloudflare Page: https://avhub.pages.dev/
|
||||
|
||||
@ -56,7 +56,7 @@ python main.py
|
||||
```
|
||||
默认运行的API地址:`http://127.0.0.1:8000/`
|
||||
|
||||
可以配置反代和域名,替换 `web/script.js` 52行中的 `BASE_URL`
|
||||
可以配置反代和域名,替换 `web/script.js` 38行中的 `BASE_URL`
|
||||
|
||||
后端运行的配置文件在 `data/config.yaml` 中,请根据实际情况修改
|
||||
|
||||
|
||||
@ -13,9 +13,11 @@ av_spider:
|
||||
source_url: "https://missav.ai/cn/search/"
|
||||
proxy_url: "http://192.168.50.3:7890" # http or socks5 proxy
|
||||
use_proxy: false
|
||||
use_cache: true # 是否启用缓存
|
||||
cache_dir: "/app/data/.av" # 缓存目录路径
|
||||
|
||||
hacg_spider:
|
||||
source_url: "https://www.hacg.mov/wp/"
|
||||
source_url: "https://www.hacg.mov"
|
||||
|
||||
logging:
|
||||
log_file: "main.log"
|
||||
|
||||
164
main.py
164
main.py
@ -14,19 +14,28 @@ import hydra
|
||||
from utils.logger import setup_logger
|
||||
import schedule
|
||||
import time
|
||||
from contextlib import asynccontextmanager
|
||||
import pathlib
|
||||
import re
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import asyncio
|
||||
|
||||
@hydra.main(config_path='data/', config_name='config', version_base=None)
|
||||
def main(cfg: DictConfig):
|
||||
# 初始化日志记录器
|
||||
logger = setup_logger(cfg)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
global logger
|
||||
logger = setup_logger(cfg)
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# 启动前的操作
|
||||
logger.info("Application startup")
|
||||
yield
|
||||
# 关闭时的操作
|
||||
logger.info("Application shutdown")
|
||||
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=cfg.app.cors_origins,
|
||||
@ -35,49 +44,94 @@ def main(cfg: DictConfig):
|
||||
allow_headers=cfg.app.cors_headers,
|
||||
)
|
||||
|
||||
def get_image_url(video_url: str) -> str:
|
||||
# 创建线程池
|
||||
executor = ThreadPoolExecutor(max_workers=10)
|
||||
|
||||
def _fetch_url(url: str) -> str:
|
||||
"""获取URL内容"""
|
||||
try:
|
||||
response = requests.get(url, timeout=10) # 减少超时时间到10秒
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to fetch URL {url}: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _parse_html(html_content: str, image_dir_url: str) -> list:
|
||||
"""解析HTML内容并提取链接"""
|
||||
try:
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
a_tags = soup.find_all('a', href=True)
|
||||
links = [image_dir_url + tag['href'] for tag in a_tags if tag['href'] != '../']
|
||||
return [link for link in links if link.endswith('.webp')] or links
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse HTML: {str(e)}")
|
||||
return []
|
||||
|
||||
async def get_image_url(video_url: str) -> str:
|
||||
"""异步获取图片URL"""
|
||||
try:
|
||||
# 构建图片目录URL
|
||||
image_dir_url = video_url.replace('index.m3u8', 'image/')
|
||||
|
||||
# 发送请求获取目录内容
|
||||
response = requests.get(image_dir_url, timeout=20) # 设置超时时间防止长时间等待
|
||||
response.raise_for_status() # 如果响应状态码不是200,抛出HTTPError
|
||||
# 设置超时时间为15秒的Future
|
||||
loop = asyncio.get_event_loop()
|
||||
html_content = await asyncio.wait_for(
|
||||
loop.run_in_executor(executor, _fetch_url, image_dir_url),
|
||||
timeout=15
|
||||
)
|
||||
|
||||
# 解析HTML并提取链接
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
a_tags = soup.find_all('a', href=True) # 只查找有href属性的<a>标签
|
||||
if not html_content:
|
||||
return None
|
||||
|
||||
# 分离出.webp和其他格式链接,并排除上级目录链接
|
||||
links = [image_dir_url + tag['href'] for tag in a_tags if tag['href'] != '../']
|
||||
webp_links = [link for link in links if link.endswith('.webp')]
|
||||
# HTML解析设置5秒超时
|
||||
links = await asyncio.wait_for(
|
||||
loop.run_in_executor(executor, _parse_html, html_content, image_dir_url),
|
||||
timeout=5
|
||||
)
|
||||
|
||||
# 优先返回.webp链接,如果没有则从其他链接中随机返回
|
||||
if not links:
|
||||
logger.warning("No image links found.")
|
||||
return None
|
||||
return random.choice(webp_links or links)
|
||||
|
||||
return random.choice(links)
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Timeout while processing image URL for {video_url}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to obtain the image URL: {str(e)}")
|
||||
return None
|
||||
|
||||
def read_random_line(file_path: str) -> tuple[str, str]:
|
||||
"""Reads a random line from a given file and returns video URL and image URL."""
|
||||
async def read_random_line(file_path: str) -> tuple[str, str]:
|
||||
"""异步读取随机行并获取图片URL"""
|
||||
if not os.path.isfile(file_path):
|
||||
logger.error("File not found")
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
with open(file_path, 'r') as file:
|
||||
lines = file.readlines()
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
# 文件读取设置2秒超时
|
||||
lines = await asyncio.wait_for(
|
||||
loop.run_in_executor(executor, lambda: open(file_path, 'r').readlines()),
|
||||
timeout=2
|
||||
)
|
||||
|
||||
if not lines:
|
||||
logger.error("File is empty")
|
||||
raise HTTPException(status_code=400, detail="File is empty")
|
||||
|
||||
random_line = random.choice(lines).strip()
|
||||
img_url = get_image_url(random_line)
|
||||
# 获取图片URL设置总超时20秒
|
||||
img_url = await asyncio.wait_for(get_image_url(random_line), timeout=20)
|
||||
|
||||
return random_line, img_url
|
||||
except asyncio.TimeoutError:
|
||||
logger.error("Timeout while reading random line or fetching image URL")
|
||||
# 如果超时,返回视频URL但不返回图片URL
|
||||
return random.choice(lines).strip() if lines else None, None
|
||||
except Exception as e:
|
||||
logger.error(f"Error in read_random_line: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/v1/hacg")
|
||||
async def read_hacg():
|
||||
@ -92,36 +146,80 @@ def main(cfg: DictConfig):
|
||||
|
||||
@app.get("/v1/avcode/{code_str}")
|
||||
async def crawl_av(code_str: str):
|
||||
# 规范化code_str,只保留字母和数字
|
||||
code_str = re.sub(r'[^a-zA-Z0-9]', '', code_str).lower()
|
||||
|
||||
# 如果启用了缓存,确保缓存目录存在并尝试从缓存读取
|
||||
if cfg.av_spider.use_cache:
|
||||
# 确保缓存目录存在
|
||||
pathlib.Path(cfg.av_spider.cache_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
cache_path = os.path.join(cfg.av_spider.cache_dir, f"{code_str}.json")
|
||||
try:
|
||||
if os.path.exists(cache_path):
|
||||
with open(cache_path, 'r', encoding='utf-8') as f:
|
||||
cached_data = json.load(f)
|
||||
logger.info(f"Cache hit for AV code: {code_str}")
|
||||
return {"status": "succeed", "data": cached_data}
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading cache file: {str(e)}")
|
||||
|
||||
# 如果没有缓存或缓存读取失败,从网络获取
|
||||
crawler = AVSpider(av_code=code_str,
|
||||
source_url=cfg.av_spider.source_url,
|
||||
proxy_url=cfg.av_spider.proxy_url,
|
||||
use_proxy=cfg.av_spider.use_proxy,
|
||||
cfg=cfg)
|
||||
video_links = crawler.get_video_url()
|
||||
all_magnet_links = []
|
||||
|
||||
for link in video_links:
|
||||
magnet_links = crawler.get_magnet_links(link)
|
||||
all_magnet_links.extend(magnet_links)
|
||||
try:
|
||||
magnet_links = await crawler.process_av_code()
|
||||
|
||||
if not all_magnet_links:
|
||||
logger.error("No magnet links found for AV code: %s", code_str)
|
||||
if not magnet_links:
|
||||
logger.error(f"No magnet links found for AV code: {code_str}")
|
||||
raise HTTPException(status_code=404, detail="No magnet links found")
|
||||
|
||||
logger.info("Magnet links found for AV code: %s", code_str)
|
||||
return {"status": "succeed", "data": [str(item) for item in all_magnet_links]}
|
||||
# 准备数据
|
||||
magnet_data = [str(item) for item in magnet_links]
|
||||
|
||||
# 如果启用了缓存,保存到缓存文件(只保存数据部分)
|
||||
if cfg.av_spider.use_cache:
|
||||
try:
|
||||
with open(cache_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(magnet_data, f, ensure_ascii=False, indent=4)
|
||||
logger.info(f"Cache written for AV code: {code_str}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing cache file: {str(e)}")
|
||||
|
||||
logger.info(f"Magnet links found for AV code: {code_str}")
|
||||
return {"status": "succeed", "data": magnet_data}
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing AV code {code_str}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
del crawler # 确保资源被正确释放
|
||||
|
||||
@app.get("/v1/get_video")
|
||||
async def get_random_video_url():
|
||||
"""Returns a random video URL and its corresponding image URL."""
|
||||
try:
|
||||
file_path = cfg.files.video_urls_txt_path
|
||||
video_url, img_url = read_random_line(file_path)
|
||||
# 设置整体操作超时为25秒
|
||||
video_url, img_url = await asyncio.wait_for(
|
||||
read_random_line(file_path),
|
||||
timeout=25
|
||||
)
|
||||
|
||||
if not video_url:
|
||||
raise HTTPException(status_code=500, detail="Failed to get video URL")
|
||||
|
||||
logger.info("Random video URL and image URL fetched successfully")
|
||||
return {
|
||||
"url": video_url,
|
||||
"img_url": img_url or ""
|
||||
}
|
||||
except asyncio.TimeoutError:
|
||||
logger.error("Global timeout in get_random_video_url")
|
||||
raise HTTPException(status_code=504, detail="Request timeout")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to fetch random video URL: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@ -1,6 +1,10 @@
|
||||
aiohappyeyeballs==2.6.1
|
||||
aiohttp==3.11.13
|
||||
aiosignal==1.3.2
|
||||
annotated-types==0.7.0
|
||||
antlr4-python3-runtime==4.9.3
|
||||
anyio==4.8.0
|
||||
attrs==25.3.0
|
||||
beautifulsoup4==4.13.3
|
||||
certifi==2025.1.31
|
||||
cffi==1.17.1
|
||||
@ -8,11 +12,14 @@ charset-normalizer==3.4.1
|
||||
click==8.1.8
|
||||
curl_cffi==0.9.0
|
||||
fastapi==0.115.11
|
||||
frozenlist==1.5.0
|
||||
h11==0.14.0
|
||||
hydra-core==1.3.2
|
||||
idna==3.10
|
||||
multidict==6.1.0
|
||||
omegaconf==2.3.0
|
||||
packaging==24.2
|
||||
propcache==0.3.0
|
||||
pycparser==2.22
|
||||
pydantic==2.10.6
|
||||
pydantic_core==2.27.2
|
||||
@ -25,3 +32,4 @@ starlette==0.46.1
|
||||
typing_extensions==4.12.2
|
||||
urllib3==2.3.0
|
||||
uvicorn==0.34.0
|
||||
yarl==1.18.3
|
||||
197
utils/spider.py
197
utils/spider.py
@ -2,10 +2,15 @@
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
import asyncio
|
||||
import aiohttp
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from bs4 import BeautifulSoup
|
||||
from curl_cffi import requests
|
||||
from omegaconf import DictConfig
|
||||
from utils.logger import setup_logger
|
||||
from typing import List, Set, Dict, Any
|
||||
from aiohttp import ClientTimeout
|
||||
|
||||
class AVSpider:
|
||||
def __init__(self, av_code, source_url, proxy_url, use_proxy, cfg: DictConfig):
|
||||
@ -13,21 +18,87 @@ class AVSpider:
|
||||
self.av_code = av_code.lower()
|
||||
self.proxy_url = proxy_url if use_proxy else None
|
||||
self.headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
|
||||
'Content-Type': 'application/json'
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Sec-Fetch-User': '?1',
|
||||
}
|
||||
self.proxies = {
|
||||
"http": self.proxy_url,
|
||||
"https": self.proxy_url
|
||||
} if self.proxy_url else {}
|
||||
self.logger = setup_logger(cfg)
|
||||
self.executor = ThreadPoolExecutor(max_workers=10)
|
||||
|
||||
def get_video_url(self) -> list:
|
||||
"""
|
||||
获取视频页面的链接。
|
||||
def _fetch_url(self, url: str) -> str:
|
||||
"""使用curl_cffi获取URL内容"""
|
||||
try:
|
||||
response = requests.get(
|
||||
url,
|
||||
proxies=self.proxies,
|
||||
headers=self.headers,
|
||||
impersonate="chrome110",
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching {url}: {str(e)}")
|
||||
return ""
|
||||
|
||||
:return: 包含视频页面链接的列表。
|
||||
"""
|
||||
def _parse_video_page(self, html_content: str, code_str: str) -> Set[str]:
|
||||
"""在线程池中解析视频页面"""
|
||||
try:
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
unique_links = set()
|
||||
for a_tag in soup.find_all('a'):
|
||||
alt_text = a_tag.get('alt')
|
||||
if alt_text and code_str in alt_text:
|
||||
href = a_tag.get('href')
|
||||
if href:
|
||||
unique_links.add(href)
|
||||
return unique_links
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error parsing video page: {str(e)}")
|
||||
return set()
|
||||
|
||||
def _parse_magnet_page(self, html_content: str) -> List[List[str]]:
|
||||
"""在线程池中解析磁力链接页面"""
|
||||
try:
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
target_table = soup.find('table', class_='min-w-full')
|
||||
result = []
|
||||
|
||||
if target_table is not None:
|
||||
rows = target_table.find_all('tr')
|
||||
for row in rows:
|
||||
cols = row.find_all('td')
|
||||
data = []
|
||||
for col in cols:
|
||||
links = col.find_all('a', rel='nofollow')
|
||||
if links:
|
||||
for l in links:
|
||||
href = l['href']
|
||||
if "keepshare.org" not in href:
|
||||
data.append(href)
|
||||
text = col.get_text(strip=True)
|
||||
if text != "下载" and "keepshare.org" not in text:
|
||||
data.append(text)
|
||||
if data:
|
||||
result.append(data)
|
||||
return result
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error parsing magnet page: {str(e)}")
|
||||
return []
|
||||
|
||||
async def get_video_url(self) -> List[str]:
|
||||
"""获取视频页面的链接"""
|
||||
code_str = self.av_code.replace('-', '')
|
||||
match = re.match(r'([a-zA-Z]+)(\d+)', code_str)
|
||||
if not match:
|
||||
@ -37,73 +108,69 @@ class AVSpider:
|
||||
letters, digits = match.groups()
|
||||
code_str = f"{letters.lower()}-{digits}"
|
||||
url = f"{self.source_url}{code_str}"
|
||||
try:
|
||||
response = requests.get(url, proxies=self.proxies, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
self.logger.error(f"Request Error: {e}")
|
||||
|
||||
# 在线程池中执行同步请求
|
||||
loop = asyncio.get_event_loop()
|
||||
html_content = await loop.run_in_executor(self.executor, self._fetch_url, url)
|
||||
|
||||
if not html_content:
|
||||
return []
|
||||
|
||||
html_content = response.text
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
unique_links = set()
|
||||
|
||||
for a_tag in soup.find_all('a'):
|
||||
alt_text = a_tag.get('alt')
|
||||
if alt_text and code_str in alt_text:
|
||||
href = a_tag.get('href')
|
||||
if href:
|
||||
unique_links.add(href)
|
||||
|
||||
self.logger.info(f"Found video URLs: {unique_links}")
|
||||
# 在线程池中解析HTML
|
||||
unique_links = await loop.run_in_executor(
|
||||
self.executor,
|
||||
self._parse_video_page,
|
||||
html_content,
|
||||
code_str
|
||||
)
|
||||
|
||||
self.logger.info(f"Found {len(unique_links)} video URLs")
|
||||
return list(unique_links)
|
||||
|
||||
def get_magnet_links(self, link: str) -> list:
|
||||
"""
|
||||
从视频页面中提取磁力链接。
|
||||
async def get_magnet_links(self, links: List[str]) -> List[List[str]]:
|
||||
"""获取所有磁力链接"""
|
||||
loop = asyncio.get_event_loop()
|
||||
tasks = []
|
||||
|
||||
:param link: 视频页面的 URL。
|
||||
:return: 包含磁力链接的列表。
|
||||
"""
|
||||
# 创建所有获取页面内容的任务
|
||||
for link in links:
|
||||
task = loop.run_in_executor(self.executor, self._fetch_url, link)
|
||||
tasks.append(task)
|
||||
|
||||
# 等待所有页面内容获取完成
|
||||
html_contents = await asyncio.gather(*tasks)
|
||||
|
||||
# 在线程池中解析所有页面
|
||||
parse_tasks = [
|
||||
loop.run_in_executor(self.executor, self._parse_magnet_page, content)
|
||||
for content in html_contents if content
|
||||
]
|
||||
results = await asyncio.gather(*parse_tasks)
|
||||
|
||||
# 合并所有结果
|
||||
all_results = []
|
||||
for result in results:
|
||||
all_results.extend(result)
|
||||
|
||||
self.logger.info(f"Found {len(all_results)} magnet links")
|
||||
return all_results
|
||||
|
||||
async def process_av_code(self) -> List[List[str]]:
|
||||
"""处理整个AV代码的主方法"""
|
||||
try:
|
||||
response = requests.get(link, proxies=self.proxies, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
self.logger.error(f"Request Error: {e}")
|
||||
video_links = await self.get_video_url()
|
||||
if not video_links:
|
||||
return []
|
||||
|
||||
html_content = response.text
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
target_table = soup.find('table', class_='min-w-full')
|
||||
|
||||
result = []
|
||||
if target_table is not None:
|
||||
rows = target_table.find_all('tr')
|
||||
for row in rows:
|
||||
cols = row.find_all('td')
|
||||
data = []
|
||||
|
||||
for col in cols:
|
||||
links = col.find_all('a', rel='nofollow')
|
||||
if links:
|
||||
for l in links:
|
||||
href = l['href']
|
||||
if "keepshare.org" not in href:
|
||||
data.append(href)
|
||||
|
||||
text = col.get_text(strip=True)
|
||||
if text != "下载" and "keepshare.org" not in text:
|
||||
data.append(text)
|
||||
|
||||
result.append(data)
|
||||
|
||||
self.logger.info(f"Magnet links extracted from {link}")
|
||||
|
||||
return result
|
||||
magnet_links = await self.get_magnet_links(video_links)
|
||||
return magnet_links
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error processing AV code {self.av_code}: {str(e)}")
|
||||
return []
|
||||
|
||||
def __del__(self):
|
||||
"""确保线程池被正确关闭"""
|
||||
self.executor.shutdown(wait=False)
|
||||
|
||||
class HacgSpider:
|
||||
def __init__(self, url, filepath, cfg: DictConfig):
|
||||
@ -134,7 +201,7 @@ class HacgSpider:
|
||||
return pages
|
||||
|
||||
def get_links(self, page):
|
||||
url = f'{self.url}page/{page}?s=%E5%90%88%E9%9B%86&submit=%E6%90%9C%E7%B4%A2'
|
||||
url = f'{self.url}/wp/page/{page}?s=%E5%90%88%E9%9B%86&submit=%E6%90%9C%E7%B4%A2'
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
@ -98,11 +98,11 @@
|
||||
<span class="ml-2 tab-text" data-zh="显示封面" data-en="Show Cover">显示封面</span>
|
||||
</label>
|
||||
</div>
|
||||
<button id="sortButton" class="settings-button theme-toggle" onclick="showSortMenu(this)" value="date-desc">
|
||||
<button id="sortButton" class="settings-button theme-toggle" onclick="showSortMenu(this)" value="tags-desc">
|
||||
<svg class="w-4 h-4" fill="currentColor" viewBox="0 0 20 20">
|
||||
<path d="M3 3a1 1 0 000 2h11a1 1 0 100-2H3zM3 7a1 1 0 000 2h7a1 1 0 100-2H3zM3 11a1 1 0 100 2h4a1 1 0 100-2H3z"/>
|
||||
<path d="M7 7a1 1 0 011 1v3h3a1 1 0 110 2H8v3a1 1 0 11-2 0v-3H3a1 1 0 110-2h3V8a1 1 0 011-1zm7-4a1 1 0 100 2h3a1 1 0 100-2h-3zM7 3a1 1 0 000 2h3a1 1 0 000-2H7zM4 7a1 1 0 100 2h3a1 1 0 000-2H4zm0 4a1 1 0 100 2h3a1 1 0 000-2H4z"/>
|
||||
</svg>
|
||||
<span class="ml-2">最新日期</span>
|
||||
<span class="ml-2">标签最多</span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
@ -189,5 +189,15 @@
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<!-- 添加版权信息 -->
|
||||
<footer class="text-center py-4 text-gray-400 text-xs mt-8">
|
||||
<p>
|
||||
<span class="tab-text" data-zh="版权所有" data-en="Copyright">版权所有</span> © 2024
|
||||
<a href="https://github.com/levywang/avhub" target="_blank" class="text-primary hover:text-primary-hover transition-colors duration-200">
|
||||
AvHub
|
||||
</a>
|
||||
</p>
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
161
web/script.js
161
web/script.js
@ -46,147 +46,82 @@ const API_CONFIG = {
|
||||
// 搜索磁力链接
|
||||
|
||||
async function searchMagnet() {
|
||||
|
||||
const input = document.getElementById('searchInput');
|
||||
|
||||
const resultsDiv = document.getElementById('searchResults');
|
||||
|
||||
const searchTerm = input.value.replace(/\s+/g, '').trim();
|
||||
|
||||
const notification = document.getElementById('notification');
|
||||
|
||||
const container = document.getElementById('coverImageContainer');
|
||||
|
||||
const regex = /^[A-Za-z][\w\s-]*\d$/;
|
||||
|
||||
if (!searchTerm || !regex.test(searchTerm)) {
|
||||
|
||||
// 空搜索警告通知
|
||||
|
||||
notification.innerHTML = `
|
||||
|
||||
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
|
||||
|
||||
</svg>
|
||||
|
||||
<span>${translations[currentLang].emptySearchWarning}</span>
|
||||
|
||||
`;
|
||||
|
||||
notification.style.background = '#dc2626'; // 红色背景
|
||||
|
||||
notification.style.background = '#dc2626';
|
||||
notification.classList.add('show');
|
||||
|
||||
if (container) {
|
||||
|
||||
container.classList.add('hidden');
|
||||
|
||||
}
|
||||
|
||||
setTimeout(() => {
|
||||
|
||||
notification.classList.remove('show');
|
||||
|
||||
notification.style.background = ''; // 重置背景色为默认值
|
||||
|
||||
notification.style.background = '';
|
||||
}, 3000);
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
// 隐藏之前的图片和搜索结果
|
||||
|
||||
if (container) {
|
||||
|
||||
container.classList.add('hidden');
|
||||
|
||||
container.style.opacity = '0';
|
||||
|
||||
}
|
||||
|
||||
resultsDiv.innerHTML = '';
|
||||
|
||||
// 显示加载动画
|
||||
|
||||
const loadingTemplate = document.getElementById('loadingTemplate');
|
||||
|
||||
resultsDiv.innerHTML = loadingTemplate.innerHTML;
|
||||
|
||||
setLanguage(currentLang); // 更新加载文本的语言
|
||||
setLanguage(currentLang);
|
||||
|
||||
try {
|
||||
|
||||
const response = await fetch(`${API_CONFIG.BASE_URL}${API_CONFIG.ENDPOINTS.SEARCH}/${searchTerm}`);
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (Array.isArray(data.data) && data.data.length > 0) {
|
||||
|
||||
// 先显示搜索结果
|
||||
|
||||
// 解析并过滤无效结果
|
||||
const formattedResults = data.data.map(result => {
|
||||
|
||||
if (Array.isArray(result)) {
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
// 如果结果是字符串,尝试解析
|
||||
|
||||
try {
|
||||
|
||||
return JSON.parse(result.replace(/'/g, '"'));
|
||||
|
||||
} catch (e) {
|
||||
|
||||
console.error('解析结果出错:', e);
|
||||
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
}).filter(result => result !== null);
|
||||
|
||||
displaySearchResults(formattedResults);
|
||||
|
||||
// 等待搜索结果渲染完成后再显示图片
|
||||
// 对结果进行去重
|
||||
const uniqueResults = formattedResults.filter((result, index, self) => {
|
||||
// 使用磁力链接作为唯一标识
|
||||
const magnet = result[0];
|
||||
return index === self.findIndex(r => r[0] === magnet);
|
||||
});
|
||||
|
||||
displaySearchResults(uniqueResults);
|
||||
setTimeout(() => showCoverImage(searchTerm), 300);
|
||||
|
||||
} else {
|
||||
|
||||
resultsDiv.innerHTML = `<p class="text-center text-inherit opacity-75">${translations[currentLang].noResults}</p>`;
|
||||
|
||||
// 没有搜索结果时隐藏图片
|
||||
|
||||
if (container) {
|
||||
|
||||
container.classList.add('hidden');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
|
||||
console.error('搜索出错:', error);
|
||||
|
||||
resultsDiv.innerHTML = `<p class="text-center text-inherit opacity-75">${translations[currentLang].searchError}</p>`;
|
||||
|
||||
// 搜索出错时隐藏图片
|
||||
|
||||
if (container) {
|
||||
|
||||
container.classList.add('hidden');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 显示搜索结果
|
||||
@ -212,52 +147,34 @@ function displaySearchResults(results) {
|
||||
}
|
||||
|
||||
const html = results.map(([magnet, title, size, date]) => {
|
||||
|
||||
const tags = extractTags(title);
|
||||
|
||||
const tagsHtml = tags.map(tag => {
|
||||
|
||||
return `<div class="tag" data-type="${tag.type}">${getTagLabel(tag.type)}</div>`;
|
||||
|
||||
}).join('');
|
||||
|
||||
return `
|
||||
|
||||
<div class="magnet-item p-6 rounded-xl">
|
||||
|
||||
<div class="flex flex-col gap-4">
|
||||
|
||||
<h3 class="font-medium text-inherit break-all"><a rel="nofollow" href="${magnet}" target="_blank" onclick="return false;">${title}</a></h3>
|
||||
|
||||
<div class="flex flex-wrap gap-2">
|
||||
|
||||
${tagsHtml}
|
||||
|
||||
</div>
|
||||
|
||||
<p class="text-sm text-inherit opacity-75">
|
||||
|
||||
${translations[currentLang].size}: ${size} | ${translations[currentLang].date}: ${date}
|
||||
|
||||
</p>
|
||||
|
||||
<button onclick="copyToClipboard('${magnet}')"
|
||||
|
||||
class="copy-button w-full px-4 py-2 rounded-lg text-sm font-medium text-white">
|
||||
|
||||
${translations[currentLang].copyButton}
|
||||
|
||||
</button>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
`;
|
||||
|
||||
}).join('');
|
||||
|
||||
searchResults.innerHTML = html;
|
||||
|
||||
// 添加这一行,确保结果按照标签数量排序
|
||||
sortResults('tags-desc');
|
||||
}
|
||||
|
||||
// 显示封面图
|
||||
@ -729,6 +646,12 @@ const THEMES = {
|
||||
|
||||
// 排序配置
|
||||
const SORT_OPTIONS = {
|
||||
'tags-desc': {
|
||||
icon: `<svg class="w-4 h-4" fill="currentColor" viewBox="0 0 20 20">
|
||||
<path d="M18.045 3.007 12.31 3a1.965 1.965 0 0 0-1.4.585l-7.33 7.394a2 2 0 0 0 0 2.805l6.573 6.631a1.957 1.957 0 0 0 1.4.585 1.965 1.965 0 0 0 1.4-.585l7.409-7.477A2 2 0 0 0 21 11.479v-5.5a2.972 2.972 0 0 0-2.955-2.972Zm-2.452 6.438a1 1 0 1 1 0-2 1 1 0 0 1 0 2Z"/>
|
||||
</svg>`,
|
||||
label: { zh: '标签最多', en: 'Most Tags' }
|
||||
},
|
||||
'date-desc': {
|
||||
icon: `<svg class="w-4 h-4" fill="currentColor" viewBox="0 0 20 20">
|
||||
<path d="M3 3a1 1 0 000 2h11a1 1 0 100-2H3zM3 7a1 1 0 000 2h7a1 1 0 100-2H3zM3 11a1 1 0 100 2h4a1 1 0 100-2H3z"/>
|
||||
@ -1210,7 +1133,13 @@ function sortResults(sortType) {
|
||||
const [aSize, aDate] = aInfo.split('|').map(str => str.split(':')[1].trim());
|
||||
const [bSize, bDate] = bInfo.split('|').map(str => str.split(':')[1].trim());
|
||||
|
||||
// 获取标签数量
|
||||
const aTagCount = a.querySelectorAll('.tag').length;
|
||||
const bTagCount = b.querySelectorAll('.tag').length;
|
||||
|
||||
switch (sortType) {
|
||||
case 'tags-desc':
|
||||
return bTagCount - aTagCount;
|
||||
case 'date-desc':
|
||||
return new Date(bDate || 0) - new Date(aDate || 0);
|
||||
case 'date-asc':
|
||||
@ -1220,7 +1149,7 @@ function sortResults(sortType) {
|
||||
case 'size-asc':
|
||||
return parseFileSize(aSize) - parseFileSize(bSize);
|
||||
default:
|
||||
return 0;
|
||||
return bTagCount - aTagCount; // 默认按标签数量排序
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('排序比较错误:', error);
|
||||
@ -1257,7 +1186,7 @@ function displayCollections(collections) {
|
||||
`;
|
||||
collectionList.appendChild(collectionItem);
|
||||
});
|
||||
} else if (collections && typeof collections === 'object') {
|
||||
} else if (typeof collections === 'object' && collections !== null) {
|
||||
// 处理对象类型的数据
|
||||
Object.entries(collections).forEach(([title, link]) => {
|
||||
const collectionItem = document.createElement('div');
|
||||
@ -1295,26 +1224,50 @@ function getTagLabel(type) {
|
||||
function extractTags(title) {
|
||||
const tags = [];
|
||||
const tagMap = {
|
||||
// 高清标签
|
||||
'HD': {type: 'hd', priority: 1},
|
||||
'FHD': {type: 'hd', priority: 1},
|
||||
'高清': {type: 'hd', priority: 1},
|
||||
|
||||
// 字幕标签
|
||||
'字幕': {type: 'subtitle', priority: 2},
|
||||
'-C': {type: 'subtitle', priority: 2},
|
||||
'sub': {type: 'subtitle', priority: 2},
|
||||
'SUB': {type: 'subtitle', priority: 2},
|
||||
|
||||
// 无码标签
|
||||
'無修正': {type: 'uncensored', priority: 3},
|
||||
'无码': {type: 'uncensored', priority: 3},
|
||||
'uncensored': {type: 'uncensored', priority: 3},
|
||||
|
||||
// 中文标签
|
||||
'中文': {type: 'chinese', priority: 4},
|
||||
'ch': {type: 'chinese', priority: 4},
|
||||
'CH': {type: 'chinese', priority: 4},
|
||||
'chinese': {type: 'chinese', priority: 4},
|
||||
|
||||
// 破解标签
|
||||
'破解': {type: 'leak', priority: 5},
|
||||
'leak': {type: 'leak', priority: 5}
|
||||
'leak': {type: 'leak', priority: 5},
|
||||
'LEAK': {type: 'leak', priority: 5}
|
||||
};
|
||||
|
||||
// 将标题转换为小写以进行不区分大小写的匹配
|
||||
const lowerTitle = title.toLowerCase();
|
||||
|
||||
// 使用 Set 来存储已添加的标签类型,避免重复
|
||||
const addedTypes = new Set();
|
||||
|
||||
// 遍历所有关键词进行匹配
|
||||
Object.entries(tagMap).forEach(([keyword, {type, priority}]) => {
|
||||
if (title.toLowerCase().includes(keyword.toLowerCase())) {
|
||||
if (!tags.find(t => t.type === type)) {
|
||||
// 如果这个类型的标签还没有添加过,并且标题中包含关键词
|
||||
if (!addedTypes.has(type) && lowerTitle.includes(keyword.toLowerCase())) {
|
||||
tags.push({type, priority});
|
||||
}
|
||||
addedTypes.add(type);
|
||||
}
|
||||
});
|
||||
|
||||
// 按优先级排序
|
||||
return tags.sort((a, b) => a.priority - b.priority);
|
||||
}
|
||||
|
||||
@ -1429,7 +1382,7 @@ function showSortMenu(button) {
|
||||
return;
|
||||
}
|
||||
|
||||
const currentSort = button.value;
|
||||
const currentSort = button.value || 'tags-desc'; // 默认使用标签排序
|
||||
const sortMenu = document.createElement('div');
|
||||
sortMenu.className = 'sort-menu';
|
||||
|
||||
|
||||
@ -1461,3 +1461,32 @@ body.light-theme select option {
|
||||
[data-theme="light"] #coverToggle:checked + label .toggle-switch {
|
||||
background-color: var(--primary-color);
|
||||
}
|
||||
|
||||
/* Footer 样式 */
|
||||
footer {
|
||||
position: fixed;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
background: var(--card-dark);
|
||||
border-top: 1px solid var(--border-color);
|
||||
z-index: 30;
|
||||
padding: 1rem 0;
|
||||
}
|
||||
|
||||
/* 为主要内容添加底部内边距,防止被 footer 遮挡 */
|
||||
main {
|
||||
padding-bottom: 80px;
|
||||
}
|
||||
|
||||
/* 亮色主题下的 footer 样式 */
|
||||
[data-theme="light"] footer {
|
||||
background: #ffffff;
|
||||
border-color: #e5e7eb;
|
||||
}
|
||||
|
||||
/* 确保返回顶部按钮在 footer 上方 */
|
||||
.back-to-top {
|
||||
z-index: 50;
|
||||
margin-bottom: 60px;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user