| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 |
- import moment from 'moment';
- import fs from 'fs';
- import { promises as fsPromises } from 'fs';
- import commonModel from '../../model/commonModel.js';
- import config from '../../config/index.js';
- import _ from 'lodash';
- import axios from 'axios';
- import { stringUtils } from '../../util/stringClass.js';
- import { globalCache } from '../../util/GlobalCache.js';
- import machineTranslationAPI from './machineTranslation.js';
- import path from 'path';
- import { fileURLToPath } from 'url';
- const __filename = fileURLToPath(import.meta.url);
- const __dirname = path.dirname(__filename);
- export async function GetReaderBooks(ctx) {
- let result = [
- {
- Title: "Strange-Life-of-a-Cat",
- },{
- Title: "Throne-of-Magical-Arcana",
- },{
- Title: "Zhui-Xu",
- },{
- Title: "Hidden-Assassin",
- },{
- Title: "Release-that-Witch",
- },{
- Title: "The-Legendary-Mechanic",
- },
- ];
- ctx.body = { "errcode": 10000, result };
- }
- export async function GetReaderBooksChapter(ctx) {
- const param = {
- Title: ctx.query.Title || "",
- };
- let result = [];
- const { readdir, readFile, writeFile } = fs.promises;
- let sourceDir = path.join(__dirname, '../../web_crawler/'+ param.Title + '/' + param.Title + '_contents');
- const files = (await readdir(sourceDir))
- .filter(file => file.endsWith('.html'))
- .sort((a, b) => parseInt(a.split('_')[0]) - parseInt(b.split('_')[0]));
-
- for (const file of files) {
- // 提取章节标题
- const titleMatch = file.match(/_([^\.]+)\.html$/);
- const title = titleMatch ? titleMatch[1] : file;
- result.push(title);
- }
- ctx.body = { "errcode": 10000, result: result };
- }
- export async function GetReaderBooksChapterContent(ctx) {
- const param = {
- Title: ctx.query.Title || "",
- Chapter: ctx.query.Chapter || "0",
- };
- let result = [];
- const {readFile} = fs.promises;
- // 读取文本文件
- let filePath = path.join(__dirname, '../../web_crawler/'+ param.Title + '/' + param.Title + '.html');
-
- const content = await fs.promises.readFile(filePath, 'utf-8');
- console.log(`已读取文件: ${filePath}`);
-
- const chapters = content.split('\n');
- let b=false;
- // 处理每个章节
- for (const chapter of chapters) {
- if (b && chapter && chapter.indexOf("<h2>Chapter")>=0) {
- break; // 找到下一个章节标题,停止处理
- }
-
- if (chapter === "<h2>Chapter "+ param.Chapter+"</h2>") {
- b=true;
- }
- if (b) {
- let chapter2=chapter.replace(/<br\s*\/?>/gi, '\n').replace(/ /g, ' ').replace(/<h2>/g, '\n\n').replace(/<\/h2>/g, '\n\n').replace(/<p>/g, '\n').replace(/<\/p>/g, '\n').replace(/<[^>]+>/g, '').trim();
-
- // 确保每个句子标点符号(句号、问号、感叹号)后面有一个空格
- chapter2 = chapter2.replace(/([.!?])(?=\S)/g, '$1 ');
-
- // 按句子分割文本,形成段落内的句子数组
- if (chapter2) {
- // 创建当前段落的句子数组
- let paragraphSentences = [];
-
- // 先将文本按句子分割
- // 使用正则表达式匹配句子结束标志:句号、问号、感叹号后跟空格或结束
- const sentenceParts = chapter2.split(/(?<=[.!?])\s+/);
- //console.log('初始分割的句子:', sentenceParts);
-
- // 对每个句子部分处理引号
- for (const part of sentenceParts) {
- if (!part.trim()) continue;
-
- let currentText = '';
- for (let i = 0; i < part.length; i++) {
- if (part[i] === '"') {
- // 如果是引号,先添加之前的文本(如果有)
- if (currentText.trim()) {
- paragraphSentences.push(currentText.trim());
- }
- // 添加引号作为单独元素
- paragraphSentences.push('"');
- currentText = '';
- } else {
- currentText += part[i];
- }
- }
-
- // 添加剩余文本(如果有)
- if (currentText.trim()) {
- // 确保非引号文本末尾有空格
- let trimmedText = currentText.trim();
- if (!trimmedText.endsWith('"') && !trimmedText.endsWith(' ')) {
- trimmedText += ' ';
- }
- paragraphSentences.push(trimmedText);
- }
- }
-
- // 只有当段落中有句子时,才将该段落添加到结果中
- if (paragraphSentences.length > 0) {
- result.push(paragraphSentences);
- }
- }
- }
- }
-
- result=result.slice(1);
- ctx.body = { "errcode": 10000, result: result };
- }
|