Files
growth_report/utils/generate_utils.py
2025-12-12 12:37:41 +08:00

424 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import time
import pythoncom
import pandas as pd
from loguru import logger
from pptx import Presentation
from rich.console import Console
import traceback
import comtypes.client
from config.config import load_config
from utils.agent_utils import generate_comment
from utils.font_utils import install_fonts_from_directory
from utils.image_utils import find_image_path
from utils.zodiac_utils import calculate_zodiac
from utils.growt_utils import (
replace_one_page,
replace_two_page,
replace_three_page,
replace_four_page,
replace_five_page,
)
# 如果你之前没有全局定义 console这里定义一个
console = Console()
# ==========================================
# 1. 配置区域 (Configuration)
# ==========================================
config = load_config("config.toml")
# ==========================================
# 1. 生成模板(根据names.xlsx文件生成名字图片文件夹)
# ==========================================
def generate_template():
try:
# 2. 读取数据
df = pd.read_excel(config["excel_file"], sheet_name="Sheet1")
# --- 修改点开始 ---
# 直接读取 "姓名" 这一列,不使用列表包裹列名,这样得到的是一维数据
datas = df["姓名"].values.tolist()
# --- 修改点结束 ---
logger.info(f"开始生成学生模版文件,共 {len(datas)} 位学生...")
# 3. 循环处理
# 此时 name 就是字符串 '张三',而不是列表 ['张三']
for i, name in enumerate(datas):
logger.info(f"[{i + 1}/{len(datas)}] 正在生成: {name}")
# 确保 name 是字符串且去除了空格 (增加健壮性)
name = str(name).strip()
student_folder = os.path.join(config["image_folder"], name)
if os.path.exists(student_folder):
logger.info(f"学生图片文件夹已存在 {student_folder}")
else:
logger.info(f"正在生成学生图片文件夹 {student_folder}")
os.makedirs(student_folder, exist_ok=True)
except Exception as e:
logger.error(f"程序运行出错: {str(e)}")
# 打印详细报错位置,方便调试
logger.error(traceback.format_exc())
# ==========================================
# 2. 生成评语(根据names.xlsx文件生成评价)
# ==========================================
def generate_comment_all():
try:
# 1. 读取数据
excel_path = config["excel_file"]
df = pd.read_excel(excel_path, sheet_name="Sheet1")
# 检查是否存在"评价"列,不存在则新建(防止报错)
if "评价" not in df.columns:
df["评价"] = ""
# 获取学生数据行数
total_count = len(df)
logger.info(f"开始生成学生评语,共 {total_count} 位学生...")
# 强制将“评价”列转换为 object 类型
df["评价"] = df["评价"].astype("object")
# --- 遍历 DataFrame 的索引 (index) ---
# 这样我们可以通过索引 i 精准地把数据写回某一行
for i in df.index:
name = df.at[i, "姓名"] # 获取当前行的姓名
sex = df.at[i, "性别"]
if pd.isna(sex):
sex = ""
else:
sex = str(sex).strip()
# 健壮性处理
if pd.isna(name):
continue # 跳过空行
name = str(name).strip()
# 获取当前行的特征如果Excel里有“特征”这一列就读没有就用默认值
# 假设Excel里有一列叫 "表现特征",如果没有则用默认的 "有礼貌..."
traits = (
df.at[i, "表现特征"]
if "表现特征" in df.columns and not pd.isna(df.at[i, "表现特征"])
else "有礼貌、守纪律"
)
# 优化如果“评价”列已经有内容了跳过不生成节省API费用
current_comment = df.at[i, "评价"]
if not pd.isna(current_comment) and str(current_comment).strip() != "":
logger.info(f"[{i + 1}/{total_count}] {name} 已有评语,跳过。")
continue
logger.info(f"[{i + 1}/{total_count}] 正在生成评价: {name}")
try:
# 调用AI大模型生成内容
generated_text = generate_comment(
name, config["age_group"], traits, sex
)
if generated_text:
# 赋值
df.at[i, "评价"] = str(generated_text).strip()
else:
df.at[i, "评价"] = "" # 防空处理
logger.success(f"学生:{name},评语生成完毕")
# 可选:每生成 5 个就保存一次
if (i + 1) % 5 == 0:
df.to_excel(excel_path, index=False)
logger.success("✅ 阶段性保存成功")
# 避免触发API速率限制
time.sleep(1)
except Exception as e:
logger.error(f"学生:{name},生成评语出错: {str(e)}")
# --- 循环结束后最终保存文件 ---
# index=False 表示不把 pandas 的索引 (0,1,2...) 写到 Excel 第一列
df.to_excel(excel_path, index=False)
logger.success(f"所有评语已生成并写入文件:{excel_path}")
except PermissionError:
logger.error(f"保存失败!请先关闭 Excel 文件:{config['excel_file']}")
except Exception as e:
logger.error(f"程序运行出错: {str(e)}")
logger.error(traceback.format_exc())
# ==========================================
# 3. 生成成长报告(根据names.xlsx文件生成)
# ==========================================
def generate_report():
# 1. 资源准备
if install_fonts_from_directory(config["fonts_dir"]):
logger.info("等待系统识别新安装的字体...")
time.sleep(2)
os.makedirs(config["output_folder"], exist_ok=True)
# 检查模版文件是否存在
if not os.path.exists(config["source_file"]):
logger.info(f"错误: 找不到模版文件 {config["source_file"]}")
return
# 检查数据文件是否存在
if not os.path.exists(config["excel_file"]):
logger.info(f"错误: 找不到数据文件 {config['excel_file']}")
return
try:
# 2. 读取数据
df = pd.read_excel(config["excel_file"], sheet_name="Sheet1")
# 确保列名对应
columns = [
"姓名",
"英文名",
"性别",
"生日",
"属相",
"我的好朋友",
"我的爱好",
"喜欢的游戏",
"喜欢吃的食物",
"评价",
]
datas = df[columns].values.tolist()
teacher_names_str = " ".join(config["teachers"])
logger.info(f"开始处理,共 {len(datas)} 位学生...")
# 3. 循环处理
for i, row_data in enumerate(datas):
# 解包数据
(
name,
english_name,
sex,
birthday,
zodiac,
friend,
hobby,
game,
food,
comments,
) = row_data
logger.info(f"[{i + 1}/{len(datas)}] 正在生成: {name}")
# 每次循环重新加载模版
prs = Presentation(config["source_file"])
# --- 页面 1 ---
replace_one_page(prs, name, config["class_name"])
# --- 页面 2 ---
replace_two_page(prs, comments, teacher_names_str)
# --- 页面 3 ---
student_image_folder = os.path.join(config["image_folder"], name)
logger.info(f"学生:{name},图片文件夹: {student_image_folder}")
if os.path.exists(student_image_folder):
me_image_path = find_image_path(student_image_folder, "me")
# 构造信息字典供 helper 使用
info_dict = {
"name": name,
"english_name": english_name,
"sex": sex,
"birthday": birthday.strftime("%Y-%m-%d") if pd.notna(birthday) else "",
"zodiac": zodiac,
"friend": friend,
"hobby": hobby,
"game": game,
"food": food,
}
# 逻辑:必须同时满足 "不是None" 且 "是字符串" 且 "文件存在" 才能执行
if (
me_image_path
and isinstance(me_image_path, str)
and os.path.exists(me_image_path)
):
replace_three_page(prs, info_dict, me_image_path)
else:
# 只有在这里打印日志,告诉用户跳过了,但不中断程序
replace_three_page(prs, info_dict, None)
else:
logger.warning(f"⚠️ 警告: 学生:{name},学生图片文件夹不存在 {student_image_folder}")
# --- 页面 4 ---
class_image_path = find_image_path(
config["image_folder"], config["class_name"]
)
if (
class_image_path
and isinstance(class_image_path, str)
and os.path.exists(class_image_path)
):
replace_four_page(prs, class_image_path)
else:
logger.warning(f"⚠️ 警告: 班级图片文件不存在 {class_image_path}")
# --- 页面 5 ---
if os.path.exists(student_image_folder):
img1_path = find_image_path(student_image_folder, "1")
img2_path = find_image_path(student_image_folder, "2")
# 逻辑优化:
# 情况A: 两张都找到了 -> 正常插入
if img1_path and img2_path:
replace_five_page(prs, img1_path, img2_path)
# 情况B: 只找到了 1 -> 两张图都用 1 (避免报错)
elif img1_path and not img2_path:
replace_five_page(prs, img1_path, img1_path)
# 情况C: 一张都没找到
else:
logger.warning(
f"⚠️ 警告: {name} 缺少作品照片 (1.jpg/png 或 2.jpg/png)[/]"
)
else:
logger.warning(f"错误: 学生图片文件夹不存在 {student_image_folder}")
# --- 保存文件 ---
file_ext = os.path.splitext(config["source_file"])[1]
safe_name = str(name).strip()
new_filename = f"{config['class_name']} {safe_name} 幼儿成长报告{file_ext}"
output_path = os.path.join(config["output_folder"], new_filename)
try:
prs.save(output_path)
logger.success(f"学生:{name},保存成功: {new_filename}")
except PermissionError:
logger.error(
f"保存失败: 文件 {new_filename} 可能已被打开,请关闭后重试。"
)
logger.success("所有报告生成完毕!")
except Exception as e:
logger.error(f"程序运行出错: {str(e)}")
logger.error(traceback.format_exc())
# ==========================================
# 5. 转换格式(根据names.xlsx文件生成PPT转PDF)
# ==========================================
def batch_convert_folder(folder_path):
"""
【推荐】批量转换文件夹下的所有 PPT (只启动一次 PowerPoint速度快)
已修复多线程 CoInitialize 报错,并适配 GUI 日志
"""
# 子线程初始化 COM 组件
pythoncom.CoInitialize()
try:
folder_path = os.path.abspath(folder_path)
if not os.path.exists(folder_path):
logger.error(f"文件夹不存在: {folder_path}")
return
# 获取所有 ppt/pptx 文件
files = [
f for f in os.listdir(folder_path) if f.lower().endswith((".ppt", ".pptx"))
]
if not files:
logger.warning("没有找到 PPT 文件")
return
logger.info(f"发现 {len(files)} 个文件,准备开始转换...")
powerpoint = None
try:
# 1. 启动应用 (只启动一次)
powerpoint = comtypes.client.CreateObject("PowerPoint.Application")
# 【建议】在后台线程运行时,有时设置为不可见更稳定,
# 但如果遇到转换卡死,可以尝试去掉下面这行的注释,让它显示出来
# powerpoint.Visible = 1
for filename in files:
ppt_path = os.path.join(folder_path, filename)
pdf_path = os.path.splitext(ppt_path)[0] + ".pdf"
# 如果 PDF 已存在,可以选择跳过
if os.path.exists(pdf_path):
logger.info(f"[跳过] 已存在: {filename}")
continue
logger.info(f"正在转换: {filename} ...")
try:
# 打开 -> 另存为 -> 关闭
deck = powerpoint.Presentations.Open(ppt_path)
deck.SaveAs(pdf_path, 32) # 32 代表 PDF 格式
deck.Close()
except Exception as e:
logger.error(f"文件 {filename} 转换出错: {e}")
except Exception as e:
logger.error(f"PowerPoint 进程启动出错: {e}")
finally:
# 2. 退出应用
if powerpoint:
try:
powerpoint.Quit()
except:
pass
logger.success("PowerPoint 已关闭,批量转换完成。")
except Exception as e:
logger.error(f"未知错误: {e}")
finally:
# 【核心修复 2】释放资源
pythoncom.CoUninitialize()
# ==========================================
# 5. 生成属相(根据names.xlsx文件生成属相)
# ==========================================
def generate_zodiac():
try:
# 1. 读取数据
excel_path = config["excel_file"]
# sheet_name 根据实际情况修改,如果不确定可以用 sheet_name=0 读取第一个
df = pd.read_excel(excel_path, sheet_name="Sheet1")
# 2. 检查必要的列
date_column = "生日"
target_column = "属相"
if date_column not in df.columns:
logger.error(f"Excel中找不到列名{date_column}】,请检查表头。")
return
# 检查是否存在"属相"列,不存在则新建
if target_column not in df.columns:
df[target_column] = ""
# --- 获取总行数,用于日志 ---
total_count = len(df)
logger.info(f"开始生成学生属相,共 {total_count} 位学生...")
# 3. 数据清洗与计算
temp_dates = pd.to_datetime(df[date_column], errors="coerce")
df[target_column] = temp_dates.apply(calculate_zodiac)
# 5. 保存结果
save_path = excel_path
try:
df.to_excel(save_path, index=False)
logger.success(f"所有属相已更新并写入文件:{save_path}")
logger.warning(f"请检查文件 {save_path} 修改日期格式。")
except PermissionError:
logger.error(f"保存失败!请先关闭 Excel 文件:{save_path}")
except FileNotFoundError:
logger.error(f"找不到文件 {config.get('excel_file')}")
except Exception as e:
logger.error(f"程序运行出错: {str(e)}")
logger.error(traceback.format_exc())