当前位置：首页 > news >正文

数据集搜集器（百科）008

news 2026/5/11 21:11:50

对数据集搜集器（百科）007进行一下改进：
错误处理：增加更多的错误处理，比如网络请求超时、解析错误等。
用户界面：增加一些提示信息，让用户更清楚当前的操作状态。
多线程处理：确保多线程处理更加安全，避免多个线程同时操作同一资源。
日志记录：增加更多的日志记录，方便调试和追踪问题。
配置文件：引入配置文件，方便修改一些常量（如记录文件夹名称）。
完善后的代码
python

import tkinter as tk
from tkinter import filedialog, messagebox
import requests
from bs4 import BeautifulSoup
import json
import os
import threading
from tkinter import ttk
import logging# 配置日志
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')# 读取配置文件
CONFIG_FILE = 'config.json'
DEFAULT_CONFIG = {"record_folder": "记录","log_file": "app.log"
}def load_config():if os.path.exists(CONFIG_FILE):with open(CONFIG_FILE, 'r', encoding='utf-8') as file:return json.load(file)return DEFAULT_CONFIGconfig = load_config()class BaikeSearchApp:def __init__(self, root):self.root = rootself.root.title("百度百科查询工具")# 创建输入框self.input_label = tk.Label(root, text="输入问题:")self.input_label.pack(pady=5)self.input_entry = tk.Entry(root, width=80)self.input_entry.pack(pady=5)# 创建文本框self.text = tk.Text(root, wrap='word', height=20, width=80)self.text.pack(pady=10)# 创建按钮self.load_button = tk.Button(root, text="加载文件", command=self.load_file)self.load_button.pack(side=tk.LEFT, padx=10)self.query_button = tk.Button(root, text="获取回答", command=self.get_answer)self.query_button.pack(side=tk.LEFT, padx=10)self.save_button = tk.Button(root, text="保存记录", command=self.save_record)self.save_button.pack(side=tk.LEFT, padx=10)self.history_button = tk.Button(root, text="查看历史记录", command=self.show_history)self.history_button.pack(side=tk.LEFT, padx=10)self.help_button = tk.Button(root, text="帮助", command=self.show_help)self.help_button.pack(side=tk.LEFT, padx=10)# 创建状态栏self.status_var = tk.StringVar()self.status_bar = tk.Label(root, textvariable=self.status_var, bd=1, relief=tk.SUNKEN, anchor=tk.W)self.status_bar.pack(side=tk.BOTTOM, fill=tk.X)# 创建进度条self.progress = ttk.Progressbar(root, orient="horizontal", length=300, mode="determinate")self.progress.pack(pady=10)# 初始化历史记录self.history = []self.root.protocol("WM_DELETE_WINDOW", self.on_closing)def on_closing(self):if hasattr(self, 'thread') and self.thread.is_alive():messagebox.showinfo("提示", "请等待所有任务完成后再关闭窗口。")else:self.root.destroy()def load_file(self):file_path = filedialog.askopenfilename(filetypes=[("Text files", "*.txt")])if file_path:with open(file_path, 'r', encoding='utf-8') as file:lines = file.readlines()total_lines = len(lines)self.progress["maximum"] = total_linesfor i, line in enumerate(lines):self.text.insert(tk.END, f"问题: {line.strip()}\n")self.get_answer(line.strip())self.progress["value"] = i + 1self.root.update_idletasks()self.status_var.set(f"已加载文件: {file_path}")def get_answer(self, query=None):if not query:query = self.input_entry.get().strip()if not query:query = self.text.get("insert linestart", "insert lineend").strip()if not query:messagebox.showwarning("警告", "请先输入或选择一个问题")returnself.status_var.set(f"正在查询: {query}")logging.info(f"开始查询: {query}")self.thread = threading.Thread(target=self._get_answer, args=(query,))self.thread.start()def _get_answer(self, query):url = f"https://baike.baidu.com/item/{query}"try:response = requests.get(url, timeout=10)response.raise_for_status()soup = BeautifulSoup(response.content, 'html.parser')# 从<meta>标签中提取描述description_tag = soup.find('meta', attrs={'name': 'description'})if description_tag and 'content' in description_tag.attrs:content = description_tag['content']else:content = "未找到相关词条"answer = {"question": query,"human_answers": [content],"chatgpt_answers": [content]}formatted_answer = f"问题: {query}\n答案: {content}\n\n"self.text.insert(tk.END, formatted_answer)self.history.append(answer)self.status_var.set(f"查询完成: {query}")logging.info(f"查询完成: {query}")except requests.RequestException as e:self.text.insert(tk.END, f"请求失败: {e}\n")self.status_var.set("请求失败")logging.error(f"请求失败: {e}")def save_record(self):record_folder = config["record_folder"]if not os.path.exists(record_folder):os.makedirs(record_folder)with open(os.path.join(record_folder, "bata.txt"), 'w', encoding='utf-8') as file:for record in self.history:file.write(json.dumps(record, ensure_ascii=False) + "\n")self.status_var.set("记录已保存")def show_history(self):history_window = tk.Toplevel(self.root)history_window.title("历史记录")history_text = tk.Text(history_window, wrap='word', height=20, width=80)history_text.pack(pady=10)for record in self.history:history_text.insert(tk.END, json.dumps(record, ensure_ascii=False) + "\n")clear_button = tk.Button(history_window, text="清空历史记录", command=self.clear_history)clear_button.pack(pady=10)def clear_history(self):self.history = []self.text.delete(1.0, tk.END)self.status_var.set("历史记录已清空")def show_help(self):help_window = tk.Toplevel(self.root)help_window.title("帮助文档")help_text = tk.Text(help_window, wrap='word', height=20, width=80)help_text.pack(pady=10)help_content = """使用说明:1. 在输入框中输入问题，点击“获取回答”按钮查询答案。2. 点击“加载文件”按钮，选择包含问题的文本文件，批量查询答案。3. 查询结果会显示在文本框中，并自动保存到历史记录。4. 点击“保存记录”按钮，将历史记录保存到文件中。5. 点击“查看历史记录”按钮，查看和管理历史记录。6. 点击“帮助”按钮，查看使用说明。"""help_text.insert(tk.END, help_content)if __name__ == "__main__":root = tk.Tk()app = BaikeSearchApp(root)root.mainloop()

主要改进点

配置文件：引入了 config.json 文件来存储一些常量，如记录文件夹名称。
错误处理：增加了网络请求的超时处理。
日志记录：增加了更多的日志记录，方便调试和追踪问题。
用户界面：增加了更多的状态提示，让用户更清楚当前的操作状态。

数据集搜集器（百科）008

主要改进点

相关文章：

数据集搜集器（百科）008

Java学习，反射

数据结构（18）数的定义与基本术语

Flink的双流join理解

《使用Python进行数据挖掘：理论、应用与案例研究》

Go语言技巧：快速统一字符串中的换行符，解决跨平台问题

算法训练营day20(二叉树06:最大二叉树,合并二叉树,搜索二叉树,验证搜索二叉树)

Leetcode（区间合并习题思路总结，持续更新。。。）

『python爬虫』使用docling 将pdf或html网页转为MD （保姆级图文）

elasticsearch现有集群扩展节点

力扣162：寻找峰值

Kafka-Connect

递归、搜索与回溯算法 - 3 （ floodfill 记忆化搜素 9000 字详解）

YOLOv9改进，YOLOv9引入CAS-ViT（卷积加自注意力视觉变压器）中AdditiveBlock模块，二次创新RepNCSPELAN4结构

HDLCPPP原理与配置

react + vite 中的环境变量怎么获取

知识蒸馏中有哪些经验| 目标检测 |mobile-yolov5-pruning-distillation项目中剪枝知识分析

Oracle 19c RAC单节点停机维护硬件

Linux系统进程

机载视频流回传+编解码方案

别再傻等进位了！手把手教你用Verilog实现4位超前进位加法器（附完整代码）

Spring AI 2.0 开发Java Agent智能体 - 会话记忆(Chat Memory)

从零搭建生产级LLM API服务：架构设计、部署与性能调优实战

航模电调XXD2212的“坑”与“宝”：从欠压报警到堵转丢步的实战避坑指南

从医学到金融：用Python实战Cox比例风险模型进行企业风险预测（附完整代码）

CANN/asc-devkit FreeAllEvent API文档

Topit：突破macOS窗口层级限制，打造极致高效的多任务工作流

FanControl终极指南：Windows风扇智能控制完全手册

如何快速导出API账单数据？New API 数据导出功能完整指南

前端状态管理：主流状态管理库对比与选型指南