import moment from 'moment'; import fs from 'fs'; import { promises as fsPromises } from 'fs'; import commonModel from '../../model/commonModel.js'; import config from '../../config/index.js'; import _ from 'lodash'; import axios from 'axios'; import { stringUtils } from '../../util/stringClass.js'; import { globalCache } from '../../util/GlobalCache.js'; import machineTranslationAPI from './machineTranslation.js'; import path from 'path'; import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); export async function GetReaderBooks(ctx) { let result = [ { Title: "Strange-Life-of-a-Cat", },{ Title: "Throne-of-Magical-Arcana", },{ Title: "Zhui-Xu", },{ Title: "Hidden-Assassin", },{ Title: "Release-that-Witch", },{ Title: "The-Legendary-Mechanic", }, ]; ctx.body = { "errcode": 10000, result }; } export async function GetReaderBooksChapter(ctx) { const param = { Title: ctx.query.Title || "", }; let result = []; const { readdir, readFile, writeFile } = fs.promises; let sourceDir = path.join(__dirname, '../../web_crawler/'+ param.Title + '/' + param.Title + '_contents'); const files = (await readdir(sourceDir)) .filter(file => file.endsWith('.html')) .sort((a, b) => parseInt(a.split('_')[0]) - parseInt(b.split('_')[0])); for (const file of files) { // 提取章节标题 const titleMatch = file.match(/_([^\.]+)\.html$/); const title = titleMatch ? titleMatch[1] : file; result.push(title); } ctx.body = { "errcode": 10000, result: result }; } export async function GetReaderBooksChapterContent(ctx) { const param = { Title: ctx.query.Title || "", Chapter: ctx.query.Chapter || "0", }; let result = []; const {readFile} = fs.promises; // 读取文本文件 let filePath = path.join(__dirname, '../../web_crawler/'+ param.Title + '/' + param.Title + '.html'); const content = await fs.promises.readFile(filePath, 'utf-8'); console.log(`已读取文件: ${filePath}`); const chapters = content.split('\n'); let b=false; // 处理每个章节 for (const chapter of chapters) { if (b && chapter && chapter.indexOf("

Chapter")>=0) { break; // 找到下一个章节标题,停止处理 } if (chapter === "

Chapter "+ param.Chapter+"

") { b=true; } if (b) { let chapter2=chapter.replace(//gi, '\n').replace(/ /g, ' ').replace(/

/g, '\n\n').replace(/<\/h2>/g, '\n\n').replace(/

/g, '\n').replace(/<\/p>/g, '\n').replace(/<[^>]+>/g, '').trim(); // 确保每个句子标点符号(句号、问号、感叹号)后面有一个空格 chapter2 = chapter2.replace(/([.!?])(?=\S)/g, '$1 '); // 按句子分割文本,形成段落内的句子数组 if (chapter2) { // 创建当前段落的句子数组 let paragraphSentences = []; // 先将文本按句子分割 // 使用正则表达式匹配句子结束标志:句号、问号、感叹号后跟空格或结束 const sentenceParts = chapter2.split(/(?<=[.!?])\s+/); //console.log('初始分割的句子:', sentenceParts); // 对每个句子部分处理引号 for (const part of sentenceParts) { if (!part.trim()) continue; let currentText = ''; for (let i = 0; i < part.length; i++) { if (part[i] === '"') { // 如果是引号,先添加之前的文本(如果有) if (currentText.trim()) { paragraphSentences.push(currentText.trim()); } // 添加引号作为单独元素 paragraphSentences.push('"'); currentText = ''; } else { currentText += part[i]; } } // 添加剩余文本(如果有) if (currentText.trim()) { // 确保非引号文本末尾有空格 let trimmedText = currentText.trim(); if (!trimmedText.endsWith('"') && !trimmedText.endsWith(' ')) { trimmedText += ' '; } paragraphSentences.push(trimmedText); } } // 只有当段落中有句子时,才将该段落添加到结果中 if (paragraphSentences.length > 0) { result.push(paragraphSentences); } } } } result=result.slice(1); ctx.body = { "errcode": 10000, result: result }; }