chengjie преди 4 месеца
родител
ревизия
7cff2d4832
променени са 5 файла, в които са добавени 236 реда и са изтрити 366 реда
  1. 38 366
      src/api/yjbdc/aiController.js
  2. 167 0
      src/api/yjbdc/enhanceFormsOfWords.js
  3. 6 0
      src/api/yjbdc/yjbdcController.js
  4. 22 0
      test-enhance.mjs
  5. 3 0
      test-syntax.mjs

+ 38 - 366
src/api/yjbdc/aiController.js

@@ -1,5 +1,6 @@
1 1
 import axios from 'axios';
2 2
 import config from '../../config/index.js';
3
+import { enhanceFormsOfWords } from './enhanceFormsOfWords.js';
3 4
 
4 5
 /**
5 6
  * AI平台接口类
@@ -217,7 +218,7 @@ class AIProviderFactory {
217 218
  * @param {string} provider - AI提供者名称,默认为'volces'
218 219
  * @returns {Promise<string>} - 返回生成的文章JSON字符串
219 220
  */
220
-export async function generateArticle(content, provider = 'volces1-5') {
221
+async function generateArticle(content, provider = 'volces1-5') {
221 222
     try {
222 223
         const aiProvider = AIProviderFactory.getProvider(provider);
223 224
         const result = await aiProvider.generateArticle(content);
@@ -229,381 +230,52 @@ export async function generateArticle(content, provider = 'volces1-5') {
229 230
 }
230 231
 
231 232
 /**
232
- * 计算两个字符串之间的Levenshtein距离(编辑距离)
233
- * @param {string} a - 第一个字符串
234
- * @param {string} b - 第二个字符串
235
- * @returns {number} - 编辑距离
233
+ * 验证并修复JSON结构
234
+ * @param {string} jsonString - 需要验证和修复的JSON字符串
235
+ * @returns {string} - 返回修复后的JSON字符串
236 236
  */
237
-function levenshteinDistance(a, b) {
238
-    const matrix = [];
239
-    
240
-    // 初始化矩阵
241
-    for (let i = 0; i <= b.length; i++) {
242
-        matrix[i] = [i];
243
-    }
244
-    
245
-    for (let j = 0; j <= a.length; j++) {
246
-        matrix[0][j] = j;
247
-    }
248
-    
249
-    // 填充矩阵
250
-    for (let i = 1; i <= b.length; i++) {
251
-        for (let j = 1; j <= a.length; j++) {
252
-            if (b.charAt(i - 1) === a.charAt(j - 1)) {
253
-                matrix[i][j] = matrix[i - 1][j - 1];
254
-            } else {
255
-                matrix[i][j] = Math.min(
256
-                    matrix[i - 1][j - 1] + 1, // 替换
257
-                    matrix[i][j - 1] + 1,     // 插入
258
-                    matrix[i - 1][j] + 1      // 删除
259
-                );
260
-            }
261
-        }
262
-    }
263
-    
264
-    return matrix[b.length][a.length];
265
-}
266
-
267
-/**
268
- * 增强FormsOfWords,检测文章中单词的变形形式和拼写错误
269
- * @param {Object} jsonObj - 解析后的JSON对象
270
- * @param {string} userWords - 用户提供的单词列表,逗号分隔
271
- * @returns {Object} - 增强后的JSON对象
272
- */
273
-export function enhanceFormsOfWords(jsonObj, userWords) {
274
-    if (!jsonObj || !userWords) return jsonObj;
275
-    
276
-    // 将用户提供的单词转换为数组并去除空格
277
-    const userWordsList = userWords.split(',').map(word => word.trim().toLowerCase());
278
-    
279
-    // 如果没有ArticleEnglish或FormsOfWords,直接返回
280
-    if (!jsonObj.ArticleEnglish || !Array.isArray(jsonObj.ArticleEnglish)) {
281
-        return jsonObj;
282
-    }
283
-    
284
-    // 确保FormsOfWords存在
285
-    if (!jsonObj.FormsOfWords) {
286
-        jsonObj.FormsOfWords = [];
287
-    }
288
-    
289
-    // 从文章中提取所有单词
290
-    const allWordsInArticle = [];
291
-    jsonObj.ArticleEnglish.forEach(sentence => {
292
-        // 移除标点符号,分割成单词
293
-        const words = sentence.toLowerCase()
294
-            .replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, " ")
295
-            .replace(/\s+/g, " ")
296
-            .split(" ");
297
-        
298
-        words.forEach(word => {
299
-            if (word) allWordsInArticle.push(word.trim());
300
-        });
301
-    });
302
-    
303
-    // 常见的后缀列表
304
-    const commonSuffixes = [
305
-        'ed', 'ing', 's', 'es', 'er', 'est', 'ful', 'ly', 'ment', 'ness', 'ity', 
306
-        'tion', 'sion', 'ation', 'able', 'ible', 'al', 'ial', 'ic', 'ical', 'ious', 
307
-        'ous', 'ive', 'less', 'y'
308
-    ];
309
-    
310
-    // 不规则动词变化表(部分常见的)
311
-    const irregularVerbs = {
312
-        'go': ['went', 'gone', 'goes', 'going'],
313
-        'be': ['am', 'is', 'are', 'was', 'were', 'been', 'being'],
314
-        'do': ['did', 'done', 'does', 'doing'],
315
-        'have': ['has', 'had', 'having'],
316
-        'say': ['said', 'says', 'saying'],
317
-        'make': ['made', 'makes', 'making'],
318
-        'take': ['took', 'taken', 'takes', 'taking'],
319
-        'come': ['came', 'comes', 'coming'],
320
-        'see': ['saw', 'seen', 'sees', 'seeing'],
321
-        'know': ['knew', 'known', 'knows', 'knowing'],
322
-        'get': ['got', 'gotten', 'gets', 'getting'],
323
-        'give': ['gave', 'given', 'gives', 'giving'],
324
-        'find': ['found', 'finds', 'finding'],
325
-        'think': ['thought', 'thinks', 'thinking'],
326
-        'tell': ['told', 'tells', 'telling'],
327
-        'become': ['became', 'becomes', 'becoming'],
328
-        'show': ['showed', 'shown', 'shows', 'showing'],
329
-        'leave': ['left', 'leaves', 'leaving'],
330
-        'feel': ['felt', 'feels', 'feeling'],
331
-        'put': ['puts', 'putting'],
332
-        'bring': ['brought', 'brings', 'bringing'],
333
-        'begin': ['began', 'begun', 'begins', 'beginning'],
334
-        'keep': ['kept', 'keeps', 'keeping'],
335
-        'hold': ['held', 'holds', 'holding'],
336
-        'write': ['wrote', 'written', 'writes', 'writing'],
337
-        'stand': ['stood', 'stands', 'standing'],
338
-        'hear': ['heard', 'hears', 'hearing'],
339
-        'let': ['lets', 'letting'],
340
-        'mean': ['meant', 'means', 'meaning'],
341
-        'set': ['sets', 'setting'],
342
-        'meet': ['met', 'meets', 'meeting'],
343
-        'run': ['ran', 'runs', 'running'],
344
-        'pay': ['paid', 'pays', 'paying'],
345
-        'sit': ['sat', 'sits', 'sitting'],
346
-        'speak': ['spoke', 'spoken', 'speaks', 'speaking'],
347
-        'lie': ['lay', 'lain', 'lies', 'lying'],
348
-        'lead': ['led', 'leads', 'leading'],
349
-        'read': ['read', 'reads', 'reading'],
350
-        'sleep': ['slept', 'sleeps', 'sleeping'],
351
-        'win': ['won', 'wins', 'winning'],
352
-        'understand': ['understood', 'understands', 'understanding'],
353
-        'draw': ['drew', 'drawn', 'draws', 'drawing'],
354
-        'sing': ['sang', 'sung', 'sings', 'singing'],
355
-        'fall': ['fell', 'fallen', 'falls', 'falling'],
356
-        'fly': ['flew', 'flown', 'flies', 'flying'],
357
-        'grow': ['grew', 'grown', 'grows', 'growing'],
358
-        'lose': ['lost', 'loses', 'losing'],
359
-        'teach': ['taught', 'teaches', 'teaching'],
360
-        'eat': ['ate', 'eaten', 'eats', 'eating'],
361
-        'drink': ['drank', 'drunk', 'drinks', 'drinking']
362
-    };
363
-    
364
-    // 不规则形容词比较级和最高级
365
-    const irregularAdjectives = {
366
-        'good': ['better', 'best'],
367
-        'bad': ['worse', 'worst'],
368
-        'far': ['further', 'furthest', 'farther', 'farthest'],
369
-        'little': ['less', 'least'],
370
-        'many': ['more', 'most'],
371
-        'much': ['more', 'most']
372
-    };
373
-    
374
-    // 收集所有单词形式
375
-    const allForms = new Set();
376
-    
377
-    userWordsList.forEach(originalWord => {
378
-        // 添加原始单词
379
-        allForms.add(originalWord);
380
-        
381
-        // 检查不规则动词
382
-        if (irregularVerbs[originalWord]) {
383
-            irregularVerbs[originalWord].forEach(form => allForms.add(form));
384
-        }
385
-        
386
-        // 检查不规则形容词
387
-        if (irregularAdjectives[originalWord]) {
388
-            irregularAdjectives[originalWord].forEach(form => allForms.add(form));
389
-        }
390
-        
391
-        // 检查文章中的所有单词,寻找可能的变形和拼写错误
392
-        allWordsInArticle.forEach(articleWord => {
393
-            // 检查是否是原始单词
394
-            if (articleWord === originalWord) {
395
-                allForms.add(articleWord);
396
-                return;
397
-            }
398
-            
399
-            // 检查拼写错误,使用更严格的条件
400
-            // 1. 对于短单词(长度<=4),只接受编辑距离为1的情况
401
-            // 2. 对于中等长度单词(4<长度<=8),只接受编辑距离为1的情况
402
-            // 3. 对于长单词(长度>8),允许编辑距离为2,但有额外限制
403
-            if (originalWord.length >= 5 && originalWord.length <= 8) {
404
-                // 短单词和中等长度单词使用相同的严格条件
405
-                if (articleWord[0] === originalWord[0] && // 首字母必须相同
406
-                    Math.abs(articleWord.length - originalWord.length) <= 1 && // 长度差不超过1
407
-                    levenshteinDistance(originalWord, articleWord) === 1) { // 编辑距离恰好为1
408
-                    allForms.add(articleWord);
409
-                }
410
-            } else {
411
-                // 长单词(长度>8)的条件
412
-                if (articleWord[0] === originalWord[0]) { // 首字母必须相同
413
-                    const editDistance = levenshteinDistance(originalWord, articleWord);
414
-                    
415
-                    if (editDistance === 1) {
416
-                        // 编辑距离为1的情况,长度差不超过1
417
-                        if (Math.abs(articleWord.length - originalWord.length) <= 1) {
418
-                            allForms.add(articleWord);
419
-                        }
420
-                    } else if (editDistance === 2) {
421
-                        // 编辑距离为2的情况,需要更严格的条件
422
-                        // 长度差不超过1且单词长度大于8
423
-                        if (Math.abs(articleWord.length - originalWord.length) <= 1) {
424
-                            allForms.add(articleWord);
425
-                        }
426
-                    }
427
-                }
428
-            }
429
-            
430
-            // 检查是否是通过添加后缀形成的变形
431
-            for (const suffix of commonSuffixes) {
432
-                if (articleWord.endsWith(suffix)) {
433
-                    const stem = articleWord.slice(0, -suffix.length);
434
-                    
435
-                    // 处理双写字母的情况(如:running -> run)
436
-                    if (stem.length > 0 && stem[stem.length-1] === stem[stem.length-2]) {
437
-                        const possibleStem = stem.slice(0, -1);
438
-                        if (possibleStem === originalWord) {
439
-                            allForms.add(articleWord);
440
-                            continue;
441
-                        }
442
-                    }
443
-                    
444
-                    // 处理去e加ing的情况(如:writing -> write)
445
-                    if (suffix === 'ing' && originalWord.endsWith('e') && stem + 'e' === originalWord) {
446
-                        allForms.add(articleWord);
447
-                        continue;
448
-                    }
449
-                    
450
-                    // 处理y变i的情况(如:studies -> study)
451
-                    if ((suffix === 'es' || suffix === 'ed') && originalWord.endsWith('y') && 
452
-                        stem + 'y' === originalWord) {
453
-                        allForms.add(articleWord);
454
-                        continue;
455
-                    }
456
-                    
457
-                    // 直接比较
458
-                    if (stem === originalWord) {
459
-                        allForms.add(articleWord);
460
-                    }
461
-                }
462
-            }
463
-        });
464
-    });
465
-    
466
-    // 更新FormsOfWords
467
-    jsonObj.FormsOfWords = Array.from(new Set([...jsonObj.FormsOfWords, ...allForms]));
468
-    
469
-    return jsonObj;
470
-}
471
-
472
-/**
473
- * 校验和修复JSON结构
474
- * @param {string} jsonString - JSON字符串
475
- * @returns {string} - 修复后的JSON字符串
476
- */
477
-export function validateAndFixJSON(jsonString) {
237
+function validateAndFixJSON(jsonString) {
478 238
     try {
479
-        //console.log(jsonString);
480
-        // 解析JSON字符串为对象
481
-        let jsonObj = JSON.parse(jsonString);
239
+        // 尝试解析JSON
240
+        const parsed = JSON.parse(jsonString);
241
+        return jsonString;
242
+    } catch (error) {
243
+        console.error("JSON解析错误,尝试修复:", error);
482 244
         
483
-        // 校验和修复Question数组中的每个问题对象
484
-        if (jsonObj.Question && Array.isArray(jsonObj.Question)) {
485
-            jsonObj.Question = jsonObj.Question.map(question => {
486
-                // 创建一个修复后的问题对象
487
-                const fixedQuestion = {};
488
-                
489
-                // 确保QuestionEnglish字段存在
490
-                if (question.QuestionEnglish) {
491
-                    fixedQuestion.QuestionEnglish = question.QuestionEnglish;
492
-                }
493
-                
494
-                // 检查QuestionChinese字段,如果不存在但有第二个QuestionEnglish,则使用它
495
-                if (question.QuestionChinese) {
496
-                    fixedQuestion.QuestionChinese = question.QuestionChinese;
497
-                } else if (Object.keys(question).filter(key => key === 'QuestionEnglish').length > 1) {
498
-                    // 找到第二个QuestionEnglish的值
499
-                    const keys = Object.keys(question);
500
-                    let foundFirst = false;
501
-                    for (const key of keys) {
502
-                        if (key === 'QuestionEnglish') {
503
-                            if (foundFirst) {
504
-                                fixedQuestion.QuestionChinese = question[key];
505
-                                break;
506
-                            }
507
-                            foundFirst = true;
508
-                        }
509
-                    }
510
-                }
511
-                
512
-                // 确保OptionsEnglish字段存在且为数组
513
-                if (question.OptionsEnglish && Array.isArray(question.OptionsEnglish)) {
514
-                    fixedQuestion.OptionsEnglish = question.OptionsEnglish;
515
-                } else {
516
-                    fixedQuestion.OptionsEnglish = ["A.", "B.", "C.", "D."];
517
-                }
518
-                
519
-                // 确保OptionsChinese字段存在且为数组
520
-                if (question.OptionsChinese && Array.isArray(question.OptionsChinese)) {
521
-                    fixedQuestion.OptionsChinese = question.OptionsChinese;
522
-                } else {
523
-                    fixedQuestion.OptionsChinese = ["A.", "B.", "C.", "D."];
524
-                }
525
-                
526
-                // 确保Answer字段存在
527
-                if (question.Answer) {
528
-                    fixedQuestion.Answer = question.Answer;
529
-                } else {
530
-                    fixedQuestion.Answer = "A";
531
-                }
532
-                
533
-                return fixedQuestion;
534
-            });
535
-        }
245
+        // 尝试修复常见的JSON错误
246
+        let fixedJson = jsonString;
536 247
         
537
-        // 确保其他必要字段存在
538
-        if (!jsonObj.ArticleEnglish || !Array.isArray(jsonObj.ArticleEnglish)) {
539
-            jsonObj.ArticleEnglish = ["No content available"];
540
-        }
248
+        // 修复缺少引号的键
249
+        fixedJson = fixedJson.replace(/(\s*?)(\w+)(\s*?):/g, '"$2":');
541 250
         
542
-        if (!jsonObj.ArticleChinese || !Array.isArray(jsonObj.ArticleChinese)) {
543
-            jsonObj.ArticleChinese = ["无可用内容"];
544
-        }
251
+        // 修复单引号
252
+        fixedJson = fixedJson.replace(/'/g, '"');
545 253
         
546
-        if (!jsonObj.FormsOfWords || !Array.isArray(jsonObj.FormsOfWords)) {
547
-            jsonObj.FormsOfWords = [];
548
-        } else {
549
-            // 处理FormsOfWords数组,提取所有单词
550
-            const processedFormsOfWords = [];
551
-            
552
-            for (const item of jsonObj.FormsOfWords) {
553
-                if (typeof item !== 'string') {
554
-                    continue; // 跳过非字符串项
555
-                }
556
-                
557
-                // 处理冒号分隔格式:"word1: word2"
558
-                if (item.includes(':')) {
559
-                    const [leftWord, rightWord] = item.split(':').map(word => word.trim());
560
-                    if (leftWord) processedFormsOfWords.push(leftWord);
561
-                    if (rightWord) processedFormsOfWords.push(rightWord);
562
-                    continue;
563
-                }
564
-                
565
-                // 处理括号分隔格式:"word1(word2)" 或 "word1(word2, word3)"
566
-                const bracketMatch = item.match(/^([^(]+)\(([^)]+)\)$/);
567
-                if (bracketMatch) {
568
-                    const outsideWord = bracketMatch[1].trim();
569
-                    const insideWords = bracketMatch[2].split(',').map(word => word.trim());
570
-                    
571
-                    if (outsideWord) processedFormsOfWords.push(outsideWord);
572
-                    for (const word of insideWords) {
573
-                        if (word) processedFormsOfWords.push(word);
574
-                    }
575
-                    continue;
576
-                }
577
-                
578
-                // 如果不符合上述格式,检查是否包含逗号
579
-                if (item.includes(',')) {
580
-                    // 如果包含逗号,按逗号分割并添加每个单词
581
-                    const words = item.split(',').map(word => word.trim());
582
-                    for (const word of words) {
583
-                        if (word) processedFormsOfWords.push(word);
584
-                    }
585
-                } else {
586
-                    // 单个单词,直接添加
587
-                    processedFormsOfWords.push(item);
588
-                }
589
-            }
590
-            
591
-            // 去除空字符串并去重
592
-            const uniqueFormsOfWords = [...new Set(processedFormsOfWords.filter(word => word))];
593
-            
594
-            // 用去重后的数组替换原数组
595
-            jsonObj.FormsOfWords = uniqueFormsOfWords;
596
-        }
254
+        // 修复尾部逗号
255
+        fixedJson = fixedJson.replace(/,\s*}/g, '}');
256
+        fixedJson = fixedJson.replace(/,\s*\]/g, ']');
597 257
         
598
-        // 将修复后的对象转回JSON字符串
599
-        return JSON.stringify(jsonObj);
600
-    } catch (jsonError) {
601
-        console.error("JSON解析或修复错误:", jsonError);
602
-        // 如果解析失败,保留原始结果
603
-        return jsonString;
258
+        // 尝试解析修复后的JSON
259
+        try {
260
+            JSON.parse(fixedJson);
261
+            console.log("JSON修复成功");
262
+            return fixedJson;
263
+        } catch (error2) {
264
+            console.error("JSON修复失败:", error2);
265
+            // 如果修复失败,返回原始字符串
266
+            return jsonString;
267
+        }
604 268
     }
605 269
 }
606 270
 
271
+// 导出所有函数
272
+export {
273
+    generateArticle,
274
+    enhanceFormsOfWords, 
275
+    validateAndFixJSON
276
+};
277
+
278
+// 默认导出,保持向后兼容性
607 279
 export default {
608 280
     generateArticle,
609 281
     enhanceFormsOfWords,

+ 167 - 0
src/api/yjbdc/enhanceFormsOfWords.js

@@ -0,0 +1,167 @@
1
+/**
2
+ * 计算两个字符串之间的Levenshtein距离(编辑距离)
3
+ * @param {string} a - 第一个字符串
4
+ * @param {string} b - 第二个字符串
5
+ * @returns {number} - 编辑距离
6
+ */
7
+function levenshteinDistance(a, b) {
8
+    if (a.length === 0) return b.length;
9
+    if (b.length === 0) return a.length;
10
+    
11
+    const matrix = [];
12
+    
13
+    // 初始化矩阵
14
+    for (let i = 0; i <= b.length; i++) {
15
+        matrix[i] = [i];
16
+    }
17
+    
18
+    for (let j = 0; j <= a.length; j++) {
19
+        matrix[0][j] = j;
20
+    }
21
+    
22
+    // 填充矩阵
23
+    for (let i = 1; i <= b.length; i++) {
24
+        for (let j = 1; j <= a.length; j++) {
25
+            if (b.charAt(i - 1) === a.charAt(j - 1)) {
26
+                matrix[i][j] = matrix[i - 1][j - 1];
27
+            } else {
28
+                matrix[i][j] = Math.min(
29
+                    matrix[i - 1][j - 1] + 1, // 替换
30
+                    matrix[i][j - 1] + 1,     // 插入
31
+                    matrix[i - 1][j] + 1      // 删除
32
+                );
33
+            }
34
+        }
35
+    }
36
+    
37
+    return matrix[b.length][a.length];
38
+}
39
+
40
+/**
41
+ * 增强FormsOfWords,检测文章中单词的变形形式和拼写错误
42
+ * @param {Object} jsonObj - 解析后的JSON对象
43
+ * @param {string} userWords - 用户提供的单词列表,逗号分隔
44
+ * @returns {Object} - 增强后的JSON对象,其中FormsOfWords是字符串数组
45
+ */
46
+export function enhanceFormsOfWords(jsonObj, userWords) {
47
+    if (!jsonObj || !userWords) return jsonObj;
48
+    
49
+    // 将用户提供的单词转换为数组并去除空格
50
+    const userWordsList = userWords.split(',').map(word => word.trim().toLowerCase()).filter(word => word);
51
+    
52
+    // 如果没有用户单词或没有ArticleEnglish,直接返回
53
+    if (userWordsList.length === 0 || !jsonObj.ArticleEnglish || !Array.isArray(jsonObj.ArticleEnglish)) {
54
+        return jsonObj;
55
+    }
56
+    
57
+    // 创建FormsOfWords数组,首先包含所有用户输入的单词
58
+    const formsOfWordsArray = [...userWordsList];
59
+    
60
+    // 存储原始句子和小写句子的映射,用于保留大小写
61
+    const originalSentences = {};
62
+    const allSentencesLower = [];
63
+    
64
+    // 从文章中提取所有单词
65
+    const allWordsInArticle = [];
66
+    
67
+    jsonObj.ArticleEnglish.forEach(sentence => {
68
+        // 保存原始句子
69
+        const sentenceLower = sentence.toLowerCase();
70
+        originalSentences[sentenceLower] = sentence;
71
+        allSentencesLower.push(sentenceLower);
72
+        
73
+        // 移除标点符号,分割成单词
74
+        const words = sentenceLower
75
+            .replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, " ")
76
+            .replace(/\s+/g, " ")
77
+            .split(" ");
78
+        
79
+        words.forEach(word => {
80
+            if (word) allWordsInArticle.push(word.trim());
81
+        });
82
+    });
83
+    
84
+    // 常见的后缀列表
85
+    const commonSuffixes = ['ed', 'ing', 's', 'es', 'er', 'est', 'ly'];
86
+    
87
+    // 处理用户提供的每个单词
88
+    userWordsList.forEach(originalInput => {
89
+        // 跳过空输入
90
+        if (!originalInput) return;
91
+        
92
+        // 在文章中查找匹配和相似的单词
93
+        const matchedForms = [];
94
+        
95
+        allWordsInArticle.forEach(articleWord => {
96
+            // 跳过空单词
97
+            if (!articleWord) return;
98
+            
99
+            // 检查是否应该添加这个单词
100
+            let shouldAdd = false;
101
+            
102
+            // 完全匹配
103
+            if (articleWord === originalInput) {
104
+                shouldAdd = true;
105
+            } 
106
+            // 前缀匹配,但要求前缀至少3个字符
107
+            else if ((articleWord.startsWith(originalInput) && originalInput.length >= 3) || 
108
+                     (originalInput.startsWith(articleWord) && articleWord.length >= 3)) {
109
+                shouldAdd = true;
110
+            } 
111
+            // 编辑距离检查,但要求首字母相同且长度相近
112
+            else if (articleWord[0] === originalInput[0] && 
113
+                     Math.abs(articleWord.length - originalInput.length) <= 2 &&
114
+                     levenshteinDistance(articleWord, originalInput) <= 1) {
115
+                shouldAdd = true;
116
+            }
117
+            // 检查常见后缀
118
+            else {
119
+                for (const suffix of commonSuffixes) {
120
+                    if (articleWord === originalInput + suffix) {
121
+                        shouldAdd = true;
122
+                        break;
123
+                    }
124
+                }
125
+            }
126
+            
127
+            if (shouldAdd) {
128
+                // 在原始句子中查找单词,以保留大小写
129
+                let originalForm = null;
130
+                
131
+                for (const sentenceLower of allSentencesLower) {
132
+                    const originalSentence = originalSentences[sentenceLower];
133
+                    // 使用正则表达式查找单词,确保匹配完整单词而不是部分
134
+                    const wordRegex = new RegExp(`\\b${articleWord.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&')}\\b`, 'i');
135
+                    const match = originalSentence.match(wordRegex);
136
+                    
137
+                    if (match) {
138
+                        originalForm = match[0];
139
+                        break;
140
+                    }
141
+                }
142
+                
143
+                // 如果找不到原始形式,使用小写形式
144
+                if (!originalForm) {
145
+                    originalForm = articleWord;
146
+                }
147
+                
148
+                // 添加到匹配形式列表
149
+                if (!matchedForms.includes(originalForm)) {
150
+                    matchedForms.push(originalForm);
151
+                }
152
+            }
153
+        });
154
+        
155
+        // 将找到的匹配形式添加到结果数组中
156
+        matchedForms.forEach(form => {
157
+            if (!formsOfWordsArray.includes(form)) {
158
+                formsOfWordsArray.push(form);
159
+            }
160
+        });
161
+    });
162
+    
163
+    // 将结果数组赋值给jsonObj.FormsOfWords
164
+    jsonObj.FormsOfWords = formsOfWordsArray;
165
+    
166
+    return jsonObj;
167
+}

+ 6 - 0
src/api/yjbdc/yjbdcController.js

@@ -284,8 +284,14 @@ export async function GenerateArticle(ctx) {
284 284
                 // 将增强后的对象转回JSON字符串
285 285
                 result2 = JSON.stringify(enhancedJsonObj);
286 286
                 console.log("FormsOfWords已增强,添加了单词变形和拼写错误检测");
287
+                
288
+                // 记录增强后的单词数量
289
+                if (enhancedJsonObj.FormsOfWords && Array.isArray(enhancedJsonObj.FormsOfWords)) {
290
+                    console.log(`增强了${enhancedJsonObj.FormsOfWords.length}个单词的变形形式`);
291
+                }
287 292
             } catch (error) {
288 293
                 console.error("增强FormsOfWords时出错:", error);
294
+                console.error(error.stack); // 添加堆栈跟踪以便更好地调试
289 295
             }
290 296
             
291 297
             let param2={};

+ 22 - 0
test-enhance.mjs

@@ -0,0 +1,22 @@
1
+import { enhanceFormsOfWords } from './src/api/yjbdc/aiController.js';
2
+
3
+// 创建一个测试用例
4
+const testJson = {
5
+  ArticleEnglish: [
6
+    "The quick brown fox jumps over the lazy dog.",
7
+    "She is running in the park every morning.",
8
+    "They have been working on this project for months.",
9
+    "I went to the store yesterday and bought some groceries.",
10
+    "The children are playing with their new toys."
11
+  ],
12
+  FormsOfWords: []
13
+};
14
+
15
+// 测试单词列表
16
+const testWords = "fox, run, go, play, work";
17
+
18
+// 调用函数
19
+const result = enhanceFormsOfWords(testJson, testWords);
20
+
21
+// 打印结果
22
+console.log(JSON.stringify(result, null, 2));

+ 3 - 0
test-syntax.mjs

@@ -0,0 +1,3 @@
1
+import { enhanceFormsOfWords } from './src/api/yjbdc/aiController.js';
2
+
3
+console.log('函数语法检查通过');