import sqlite3, re, requests, sys
from pypinyin import lazy_pinyin, Style
from bs4 import BeautifulSoup
API_KEY = "去TMDB获取API"
DB_PATH = "/usr/local/apps/@appdata/trim.media/database/trimmedia.db"
def convert_to_pinyin(text):
parts = re.findall(r'[\u4e00-\u9fff]+|[^\u4e00-\u9fff]+', text)
result = []
for part in parts:
if re.fullmatch(r'[\u4e00-\u9fff]+', part):
pinyin = ' '.join(lazy_pinyin(part, style=Style.NORMAL))
result.append(pinyin)
else:
result.append(part)
return ''.join(result)
def truncate_string(s):
return s[:10] + '...' if len(s) > 10 else s + '...'
def doScrape():
conn = sqlite3.connect(DB_PATH)
conn.isolation_level = None
cursor = conn.cursor()
cursor.execute("""
SELECT a.guid,
a.tmdb_id,
a.season_number,
a.episode_number,
a.title,
a.overview,
(SELECT b.title FROM item b WHERE a.tmdb_id = b.tmdb_id AND b.TYPE = 'TV')
FROM item a
WHERE tmdb_id IN (SELECT DISTINCT tmdb_id
FROM item
WHERE tmdb_id IS NOT NULL
AND TYPE = 'Episode'
AND ((title = '第 ' || episode_number || ' 集') OR (overview = ''))
AND tmdb_id != 0
ORDER BY
tmdb_id,
episode_number
)
AND TYPE = 'Episode'
AND guid IN (
SELECT item_guid
FROM item_media
WHERE file_birth_time > 0)
ORDER BY
tmdb_id,
episode_number
""")
data_map = []
info_map = {}
for row in cursor.fetchall():
key = f"{row[1]}~{row[2]}"
info_map[f"{key}~{row[3]}"] = row
if key not in data_map:
data_map.append(key)
mark = True
for k in data_map:
d = str(k).split("~")
url = f"https://www.themoviedb.org/tv/{d[0]}/season/{d[1]}"
for episode in getInfoByHtml(url):
try:
if f"{k}~{episode.get('episode_number')}" in info_map:
real_data = info_map[f"{k}~{episode.get('episode_number')}"]
name = episode.get('name')
name_pinyin = convert_to_pinyin(name)
update_sql = f"UPDATE item SET title = '{name}',sort_title = '{name_pinyin}',overview = '{episode.get('overview')}' WHERE guid = '{real_data[0]}'"
mk = 0
if episode.get('name') != real_data[4] and episode.get('overview') != real_data[5]:
mk = 3
elif episode.get('name') != real_data[4]:
mk = 1
elif episode.get('overview') != real_data[5]:
mk = 2
if mk != 0:
mark = False
if mk == 3:
print(
f"✅ {real_data[6]}第{real_data[2]}季第{real_data[3]}集的标题及描述更新成功! 🏅标题: {real_data[4]} ==> {episode.get('name')}, 🏆描述: {truncate_string(episode.get('overview'))}")
elif mk == 1:
print(
f"✅ {real_data[6]}第{real_data[2]}季第{real_data[3]}集的标题更新成功! 🏅标题: {real_data[4]} ==> {episode.get('name')}")
elif mk == 2:
print(
f"✅ {real_data[6]}第{real_data[2]}季第{real_data[3]}集的描述更新成功! 🏅描述: {truncate_string(episode.get('overview'))}")
cursor.execute(update_sql)
conn.commit()
except Exception:
print(f"❌ 处理单集信息失败,剧集信息: {str(episode)}")
conn.close()
if mark:
print(f"❌ 资源刮削失败,未更新任何资源!")
sys.exit(1)
def getInfoByHtml(url):
all_data = []
try:
response = requests.get(url, headers={'Accept-Language': 'zh-CN'})
if "text/html" in response.headers["Content-Type"]:
html_text = response.text.encode("utf-8")
soup = BeautifulSoup(html_text, 'lxml')
episode_list = soup.find("div", class_="episode_list").find_all("div", class_="card")
for episode in episode_list:
title = episode.find("div", class_="title")
episode_number = title.find("span", class_="episode_number").text
episode_title = title.find("div", class_="episode_title").find("h3").find("a").text
overview = episode.find("div", class_="overview").find("p").text
if overview == '暂无英文版的简介,请添加内容帮助我们完善数据库。':
overview = ''
mydata = {}
mydata["episode_number"] = episode_number
mydata["name"] = episode_title
mydata["overview"] = overview
all_data.append(mydata)
except Exception:
print(f"❌ 获取剧季信息失败,请求地址: {url}")
sys.exit(1)
return all_data
# sudo pyinstaller --onefile --clean fnos_v.py
if __name__ == "__main__":
doScrape()