hace 6 meses · d4d0539580
--- a/src/web_crawler/crawle_chinese.js
+++ b/src/web_crawler/crawle_chinese.js
@@ -12,10 +12,53 @@ import iconv from 'iconv-lite';
 
				 const __filename = fileURLToPath(import.meta.url);
			
 
				 const __dirname = path.dirname(__filename);
			
 
				 
			
 
				-const WEB_URL="https://www.piaotia.com/html/3/3759/";
			
 
				-const title = "奥术神座";
			
 
				-const author = "爱潜水的乌贼";
			
 
				-const coverName = "cover.jpeg";
			
 
				+const WEB_URL="https://www.piaotia.com/html/5/5150/";
			
 
				+const title = "回到过去变成猫";
			
 
				+const author = "陈词懒调";
			
 
				+const coverName = "cover.png";
			
 
				+
			
 
				+/**
			
 
				+ * 检查章节文件是否已存在
			
 
				+ * @param {string} contentDir - 章节内容目录
			
 
				+ * @param {object} chapter - 章节对象
			
 
				+ * @returns {boolean} - 章节文件是否存在
			
 
				+ */
			
 
				+function isChapterFileExists(contentDir, chapter) {
			
 
				+    try {
			
 
				+        const chapterFileName = `${String(chapter.index).padStart(4, '0')}_${chapter.title.replace(/[\\/:*?"<>|]/g, '_')}.html`;
			
 
				+        const chapterFilePath = path.join(contentDir, chapterFileName);
			
 
				+        return fs.existsSync(chapterFilePath);
			
 
				+    } catch (error) {
			
 
				+        console.error(`检查章节文件是否存在时出错: ${error.message}`);
			
 
				+        return false;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * 从已存在的章节文件中加载内容
			
 
				+ * @param {string} contentDir - 章节内容目录
			
 
				+ * @param {object} chapter - 章节对象
			
 
				+ * @returns {string|null} - 章节内容或null
			
 
				+ */
			
 
				+function loadExistingChapterContent(contentDir, chapter) {
			
 
				+    try {
			
 
				+        const chapterFileName = `${String(chapter.index).padStart(4, '0')}_${chapter.title.replace(/[\\/:*?"<>|]/g, '_')}.html`;
			
 
				+        const chapterFilePath = path.join(contentDir, chapterFileName);
			
 
				+        
			
 
				+        if (fs.existsSync(chapterFilePath)) {
			
 
				+            const htmlContent = fs.readFileSync(chapterFilePath, 'utf-8');
			
 
				+            // 提取正文内容
			
 
				+            const contentMatch = htmlContent.match(/<body>\s*<h1[^>]*>.*?<\/h1>\s*([\s\S]*?)\s*<\/body>/);
			
 
				+            if (contentMatch) {
			
 
				+                return contentMatch[1];
			
 
				+            }
			
 
				+        }
			
 
				+        return null;
			
 
				+    } catch (error) {
			
 
				+        console.error(`加载已存在章节内容时出错: ${error.message}`);
			
 
				+        return null;
			
 
				+    }
			
 
				+}
			
 
				 
			
 
				 /**
			
 
				  * 爬取单个章节内容
			
@@ -393,6 +436,19 @@ async function crawleWeb(outputDir = 'src/web_crawler', fetchContent = false, ma
 
				                     await setTimeout(3000);
			
 
				                 }
			
 
				                 
			
 
				+                // 检查章节文件是否已存在
			
 
				+                if (isChapterFileExists(contentDir, chapter)) {
			
 
				+                    console.log(`章节 ${chapter.title} 已存在，跳过爬取`);
			
 
				+                    // 加载已存在的章节内容
			
 
				+                    const existingContent = loadExistingChapterContent(contentDir, chapter);
			
 
				+                    if (existingContent) {
			
 
				+                        chapters[i].content = existingContent;
			
 
				+                        contentsObj[chapter.title] = existingContent;
			
 
				+                        console.log(`已加载章节 ${chapter.title} 的现有内容`);
			
 
				+                    }
			
 
				+                    continue;
			
 
				+                }
			
 
				+                
			
 
				                 // 检查章节URL是否有效
			
 
				                 if (!chapter.url || !chapter.url.startsWith('http')) {
			
 
				                     console.error(`章节 ${chapter.title} 的URL无效: ${chapter.url}`);
			
@@ -500,6 +556,19 @@ async function crawleWeb(outputDir = 'src/web_crawler', fetchContent = false, ma
 
				                         const { index, chapter } = failedItem;
			
 
				                         console.log(`重试章节: ${chapter.title}`);
			
 
				                         
			
 
				+                        // 检查章节文件是否已存在
			
 
				+                        if (isChapterFileExists(contentDir, chapter)) {
			
 
				+                            console.log(`章节 ${chapter.title} 已存在，跳过重试`);
			
 
				+                            // 加载已存在的章节内容
			
 
				+                            const existingContent = loadExistingChapterContent(contentDir, chapter);
			
 
				+                            if (existingContent) {
			
 
				+                                chapters[index].content = existingContent;
			
 
				+                                contentsObj[chapter.title] = existingContent;
			
 
				+                                console.log(`已加载章节 ${chapter.title} 的现有内容`);
			
 
				+                            }
			
 
				+                            continue;
			
 
				+                        }
			
 
				+                        
			
 
				                         // 检查章节URL是否有效
			
 
				                         if (!chapter.url || !chapter.url.startsWith('http')) {
			
 
				                             console.error(`重试章节 ${chapter.title} 的URL无效: ${chapter.url}`);