chengjie 2 ay önce
ebeveyn
işleme
ed18df9b31

+ 358 - 13
src/api/miaoguo/literacyController.js

@@ -127,9 +127,82 @@ export async function GetMiaoguoAISearch(ctx) {
127 127
         else{
128 128
             console.log("New Word");
129 129
             isNew=true;
130
+            //如果是古诗词
131
+            if (param.ShiciUrl){
132
+                console.log("古诗词");
133
+                let url2=param.ShiciUrl;
134
+                result = await axios.get(url2)
135
+                    .then(response => {
136
+                        let htmlString=response.data;
137
+                        //若是古诗词
138
+                        if (htmlString.indexOf("poem-detail-body")>0){
139
+                            let peom=getPeom(htmlString);
140
+                            param.ShiciTitle = "《" + peom.ShiciTitle + "》";
141
+                            if (peom.Dynasty)
142
+                                peom.Dynasty="["+peom.Dynasty+"]";
143
+
144
+                            //判断诗词单词是否过长,过长就分段显示
145
+                            if (peom.PeomContent) {
146
+                                peom.PeomContentIsLong = false;
147
+                                if (peom.PeomContent.length > 1 ) {
148
+                                    for (let i = 0; i < peom.PeomContent.length; i++) {
149
+                                        if (peom.PeomContent[i]){
150
+                                            let item = peom.PeomContent[i].join("");
151
+                                            if (item && item.length>18){
152
+                                                peom.PeomContentIsLong = true;
153
+                                                break;
154
+                                            }
155
+                                        }
156
+                                    }
157
+                                }
158
+                                else if (peom.PeomContent.length == 1) {
159
+
160
+                                    for (let i = 0; i < peom.PeomContent[0].length; i++) {
161
+                                        let item = peom.PeomContent[0][i];
162
+                                        if (item && item.length > 18) {
163
+                                            peom.PeomContentIsLong = true;
164
+                                            break;
165
+                                        }
166
+                                    }
167
+                                }
168
+                            }
169
+
170
+                            return {
171
+                                CHN:{
172
+                                    HanZi: param.Word,
173
+                                    ShiciTitle:param.ShiciTitle,
174
+                                    Author:peom.Author,
175
+                                    Dynasty:peom.Dynasty,
176
+                                    PeomContent:peom.PeomContent,
177
+                                    PeomContentIsLong:peom.PeomContentIsLong,
178
+                                    Translation:peom.Translation,
179
+                                }
180
+                            };
181
+                        }
182
+                        else{
183
+                            return null;
184
+                        }
185
+                    })
186
+                    .catch(err => {
187
+                        return null;
188
+                    });
189
+
190
+                let obj={};
191
+                obj.Word=param.Word;
192
+                obj.SearchType='shici';
193
+                obj.Author=result.Author;
194
+                obj.ShiciUrl=param.ShiciUrl;
195
+                obj.JSONString=JSON.stringify(result);
196
+                let sql3="INSERT INTO `MiaoguoLiteracy` SET ?;";
197
+                await commonModel.RunSql(obj,sql3);
198
+                result={"errcode": 10000, result: result};
199
+            }
130 200
             //如果是英文单词
131
-            if ((stringUtils.IsEnglish(param.Word)) || param.SearchType=="eng"){
132
-            
201
+            else if ((stringUtils.IsEnglish(param.Word)) || param.SearchType=="eng"){
202
+                
203
+                let result2=await getAiDataEng(param.Word);
204
+
205
+                result={"errcode": 10000, result: result2};
133 206
             }
134 207
             //整句中是否有中文
135 208
             else if ((stringUtils.IsChineseSentence(param.Word)) && param.SearchType!="eng") {
@@ -174,15 +247,13 @@ export async function GetMiaoguoAISearch(ctx) {
174 247
                     
175 248
                     let json=JSON.stringify(result);
176 249
 
177
-                    let sql3="INSERT INTO `MiaoguoLiteracy` SET ?;";
178
-                    //console.log(sql3);
179
-                    //console.log(json);
180 250
                     obj={};
181 251
                     obj.Word=param.Word;
182 252
                     obj.SearchType='zici';
183 253
                     obj.Author='';
184 254
                     obj.ShiciUrl='';
185 255
                     obj.JSONString=json;
256
+                    let sql3="INSERT INTO `MiaoguoLiteracy` SET ?;";
186 257
                     await commonModel.RunSql(obj,sql3);
187 258
 
188 259
                     globalCache.set(cacheKey, result, config.BufferMemoryTimeHigh);
@@ -196,7 +267,7 @@ export async function GetMiaoguoAISearch(ctx) {
196 267
 }
197 268
 
198 269
 function getWordFrequency(id){
199
-    var result={};
270
+    let result={};
200 271
     if (id==0){
201 272
         result={Min:0,Max:0};
202 273
     }
@@ -204,15 +275,15 @@ function getWordFrequency(id){
204 275
         result={Min:1,Max:220};
205 276
     }
206 277
     else if (id<=2000){
207
-        var num=Math.floor(id/100)*100;
278
+        let num=Math.floor(id/100)*100;
208 279
         result={Min:num,Max:Number(num+100)};
209 280
     }
210 281
     else if (id<=10000){
211
-        var num=Math.floor(id/500)*500;
282
+        let num=Math.floor(id/500)*500;
212 283
         result={Min:num,Max:Number(num+500)};
213 284
     }
214 285
     else{
215
-        var num=Math.floor(id/1000)*1000;
286
+        let num=Math.floor(id/1000)*1000;
216 287
         result={Min:num,Max:Number(num+1000)};
217 288
     }
218 289
     return result;
@@ -222,11 +293,11 @@ function tiankongFun(word,pinyin){
222 293
     let TianKong = [];
223 294
     if (word.length <= 4 && pinyin ) {
224 295
         
225
-        var arrPinyin = pinyin.split(" ");
296
+        let arrPinyin = pinyin.split(" ");
226 297
         //console.log(arrPinyin);
227
-        for (var i = 0; i < word.length; i++) {
228
-            var str = stringUtils.ReplaceAllString(word,word[i],arrPinyin[i]);
229
-            //var str=param.Word.substring(0,i)+arrPinyin[i]+param.Word.substring(i+1);
298
+        for (let i = 0; i < word.length; i++) {
299
+            let str = stringUtils.ReplaceAllString(word,word[i],arrPinyin[i]);
300
+            //let str=param.Word.substring(0,i)+arrPinyin[i]+param.Word.substring(i+1);
230 301
             TianKong.push(str);
231 302
         }
232 303
         TianKong= _.uniq(TianKong);
@@ -260,3 +331,277 @@ async function getAiData(word){
260 331
     
261 332
     return result;
262 333
 }
334
+
335
+async function getAiDataEng(word){
336
+    let result={};
337
+    let content = {
338
+        "instruction": "Generate information about the English \""+ word+"\".",
339
+        "output_format": {
340
+            "ENG":{
341
+                Word: word,
342
+                Soundmark:{
343
+                    "Eng":"[English phonetic alphabet]",
344
+                    "AmE":"[American phonetic alphabet]",
345
+                },
346
+                ParaphraseEng:[
347
+                    {
348
+                        "PartOfSpeech": "n.",
349
+                        "ParaphraseList": [
350
+                            "English Explanation(Noun)1.","English Explanation(Noun)2..."
351
+                        ]
352
+                    }
353
+                ],
354
+                Paraphrase:[
355
+                    {
356
+                        "PartOfSpeech": "n.",
357
+                        "ParaphraseList": [
358
+                            "中文翻译1;","中文翻译2..."
359
+                        ]
360
+                    }
361
+                ],
362
+                ExamplesSentences:[
363
+                    ["ExamplesSentences1","中文翻译1"],
364
+                    ["ExamplesSentences2","中文翻译2"],
365
+                ]
366
+            }
367
+        }
368
+    };
369
+    content=JSON.stringify(content);
370
+    //console.log(content);
371
+
372
+    let aiProvider="llama-4-maverick-17b-128e-instruct";
373
+    aiProvider="ali-qwen-plus-2025-07-14";
374
+    
375
+    //生成结果
376
+    result = await aiController.generateArticle(content, aiProvider);
377
+    
378
+    result=JSON.parse(result);
379
+
380
+    if (result.ENG.Soundmark){
381
+        result.ENG.Soundmark.EngSound="https://sp0.baidu.com/-rM1hT4a2gU2pMbgoY3K/gettts?lan=uk&text="+result.ENG.Word+"&spd=3&source=alading";
382
+        result.ENG.Soundmark.AmESound="https://sp0.baidu.com/-rM1hT4a2gU2pMbgoY3K/gettts?lan=en&text="+result.ENG.Word+"&spd=3&source=alading";
383
+    }
384
+    //console.log("result1:"+result);
385
+    
386
+    return result;
387
+}
388
+
389
+//对古诗词网页的解析
390
+function getPeom(data){
391
+    //console.log("古诗词")
392
+    let resultObject={};
393
+    let title = data.substr(data.indexOf("poem-detail-item"));
394
+    title = title.substr(title.indexOf("<h1>")+4);
395
+    title = title.substr(0,title.indexOf("</h1>"));
396
+    resultObject.ShiciTitle=title;
397
+    //console.log(title);
398
+    let result = data.substr(data.indexOf("poem-detail-header-author"));
399
+    //console.log(data);
400
+    let num1, num2;
401
+    let doWhileTime=Date.now();
402
+    do {
403
+        result = result.substr(result.indexOf("poem-info-gray"));
404
+        result = result.substr(result.indexOf(">"));
405
+        //console.log(result);
406
+        let title=result.substr(1,result.indexOf("</span>")-1);
407
+        //console.log(title);
408
+
409
+        let word = result.substring(result.indexOf("span>") + 5, result.indexOf("\n"));
410
+        if (word) {
411
+            if (title=="【作者】")
412
+                resultObject.Author=remove(word);
413
+            else if (title=="【朝代】")
414
+                resultObject.Dynasty=remove(word);
415
+        }
416
+
417
+        num1 = result.indexOf("poem-info-gray");
418
+
419
+        try {
420
+            stringUtils.loopBreaker(doWhileTime);
421
+        }
422
+        catch(err){
423
+            break;
424
+        }
425
+    }
426
+    while (num1>0);
427
+
428
+    let content=[],translation=[],temp="",tempContent="";
429
+    result = data.substr(data.indexOf("poem-detail-item-content"));
430
+    //唯一内容
431
+    if (result.indexOf("<div class=\"poem-detail-main-text\">")>=0){
432
+        let contentChild=[];
433
+        let doWhileTime=Date.now();
434
+        do {
435
+            result = result.substring(result.indexOf("<div class=\"poem-detail-main-text\">") + 35);
436
+            //console.log(result);
437
+            tempContent = result.substring(0, result.indexOf("</div>"));
438
+            //console.log(tempContent);
439
+            contentChild.push(tempContent);
440
+            result = result.substring(result.indexOf("</div>") + 6);
441
+            num1 = result.indexOf("<div class=\"poem-detail-main-text\">");
442
+
443
+            try {
444
+                stringUtils.loopBreaker(doWhileTime);
445
+            }
446
+            catch(err){
447
+                break;
448
+            }
449
+        }
450
+        while (num1>0);
451
+        contentChild=remove(contentChild);
452
+        content.push(contentChild);
453
+    }
454
+    else{
455
+        let resultContent=result;
456
+        let doWhileTime2=Date.now();
457
+        //诗正文
458
+        do {
459
+            resultContent = resultContent.substring(resultContent.indexOf("<p class=\"poem-detail-main-text"));
460
+            resultContent = resultContent.substring(resultContent.indexOf(">")+1);
461
+            tempContent=resultContent.substring(0,resultContent.indexOf("</p>")+4);
462
+            tempContent=stringUtils.ReplaceAllString(tempContent,"\n                    ","");
463
+            //console.log("tempContent:"+tempContent);
464
+            let contentChild=[],arrTemp=[];
465
+            arrTemp=tempContent.split("<span id=\"body_");
466
+
467
+            //console.log("arrTemp:"+JSON.stringify(arrTemp));
468
+
469
+            for(let i=0;i<arrTemp.length;i++){
470
+                if (arrTemp[i]){
471
+                    //console.log("tempContentChild:"+arrTemp[i]);
472
+
473
+                    arrTemp[i]=arrTemp[i].substring(arrTemp[i].indexOf(">")+1);
474
+                    arrTemp[i]=stringUtils.ReplaceAllString(arrTemp[i],"</span>                </p>","");
475
+                    arrTemp[i]=stringUtils.ReplaceAllString(arrTemp[i],"</span>","");
476
+
477
+                    //console.log("tempContentChild1:"+arrTemp[i]);
478
+
479
+                    let doWhileTime=Date.now();
480
+                    while(arrTemp[i].indexOf("<span")>=0){
481
+                        temp=arrTemp[i].substring(arrTemp[i].indexOf("<span"));
482
+                        temp = temp.substring(0,temp.indexOf("'>")+2);
483
+
484
+                        arrTemp[i]=arrTemp[i].replace(temp,"");
485
+                        try {
486
+                            stringUtils.loopBreaker(doWhileTime);
487
+                        }
488
+                        catch(err){
489
+                            break;
490
+                        }
491
+                    }
492
+
493
+                    //console.log("tempContentChild2:"+arrTemp[i]);
494
+                    arrTemp[i]=remove(arrTemp[i]);
495
+                    contentChild.push(arrTemp[i]);
496
+                }
497
+            }
498
+
499
+            content.push(contentChild);
500
+
501
+            num1 = resultContent.indexOf("<p class=\"poem-detail-main-text");
502
+            //break;
503
+
504
+            try {
505
+                stringUtils.loopBreaker(doWhileTime2);
506
+            }
507
+            catch(err){
508
+                break;
509
+            }
510
+        }
511
+        while (num1>0);
512
+
513
+        resultContent=result;
514
+        
515
+        //诗的每句译文
516
+        doWhileTime2=Date.now();
517
+        do {
518
+            if (resultContent.indexOf("<p id=\"means_p\" class=\"poem-detail-main-text body-means-p\">")>=0) {
519
+                resultContent = resultContent.substring(resultContent.indexOf("<p id=\"means_p\" class=\"poem-detail-main-text body-means-p\">"));
520
+                resultContent = resultContent.substring(resultContent.indexOf(">") + 1);
521
+                tempContent = resultContent.substring(0, resultContent.indexOf("</p>") + 4);
522
+                tempContent = stringUtils.ReplaceAllString(tempContent, "\n                    ", "");
523
+                //console.log("tempContent:" + tempContent);
524
+                let translationChild = [], arrTemp = [];
525
+                arrTemp = tempContent.split("<span id=\"means_");
526
+                //console.log("arrTemp:" + arrTemp.length);
527
+                for (let i = 0; i < arrTemp.length; i++) {
528
+                    if (arrTemp[i]) {
529
+                        //console.log("tempContentChild:"+arrTemp[i]);
530
+
531
+                        arrTemp[i] = arrTemp[i].substring(arrTemp[i].indexOf(">") + 1);
532
+                        arrTemp[i] = stringUtils.ReplaceAllString(arrTemp[i], "</span>                </p>", "");
533
+                        arrTemp[i] = stringUtils.ReplaceAllString(arrTemp[i], "</span>", "");
534
+
535
+                        let doWhileTime=Date.now();
536
+                        while (arrTemp[i].indexOf("<span") >= 0) {
537
+                            temp = arrTemp[i].substring(arrTemp[i].indexOf("<span"));
538
+                            temp = temp.substring(0, temp.indexOf(">") + 1);
539
+
540
+                            arrTemp[i] = arrTemp[i].replace(temp, "");
541
+
542
+                            try {
543
+                                stringUtils.loopBreaker(doWhileTime);
544
+                            }
545
+                            catch(err){
546
+                                break;
547
+                            }
548
+
549
+                        }
550
+
551
+                        //console.log("tempContentChild:" + arrTemp[i]);
552
+                        //console.log(tempContent);
553
+                        arrTemp[i] = remove(arrTemp[i]);
554
+                        translationChild.push(arrTemp[i]);
555
+
556
+
557
+                    }
558
+                }
559
+
560
+                translation.push(translationChild);
561
+            }
562
+
563
+            num1 = resultContent.indexOf("<p class=\"poem-detail-main-text\"");
564
+            //break;
565
+
566
+            try {
567
+                stringUtils.loopBreaker(doWhileTime2);
568
+            }
569
+            catch(err){
570
+                break;
571
+            }
572
+
573
+        }
574
+        while (num1>0);
575
+    }
576
+
577
+
578
+    resultObject.PeomContent=content;
579
+
580
+    resultObject.Translation=[];
581
+    if (translation && translation.length>0){
582
+        resultObject.Translation=translation;
583
+    }
584
+    else {
585
+        result = data.substr(data.indexOf("poem-detail-item-content means-fold"));
586
+        result = result.substr(result.indexOf(">"));
587
+        translation=remove(result.substr(1, result.indexOf("</div>") - 1));
588
+        if (translation)
589
+            resultObject.Translation.push(translation);
590
+    }
591
+
592
+    //console.log(resultObject);
593
+
594
+    return resultObject;
595
+
596
+    function remove(str){
597
+        if (stringUtils.IsArray(str)){
598
+            str=str.join("\n");
599
+        }
600
+        str = stringUtils.ReplaceAllString(str,"</span>","");
601
+        str = stringUtils.ReplaceAllString(str,"<br>","\n");
602
+        str = stringUtils.ReplaceAllString(str,"</br>","\n");
603
+        str = stringUtils.Trim(str);
604
+        return str;
605
+    }
606
+}
607
+

+ 1 - 1
src/model/miaoguo.js

@@ -9,7 +9,7 @@ class Miaoguo {
9 9
     static async GetLiteracyItem(obj) {
10 10
         try {
11 11
             if (obj.ShiciUrl) {
12
-                sql="select * from MiaoguoLiteracy where Word=? and SearchType=? and Author=? and ShiciUrl=? order by ID;";
12
+                let sql="select * from MiaoguoLiteracy where Word=? and SearchType=? and Author=? and ShiciUrl=? order by ID;";
13 13
                 let param=[obj.Word,obj.SearchType,obj.Author,obj.ShiciUrl];
14 14
                 return await query(sql, param);
15 15
             }

+ 33 - 17
src/test/build.test41.js

@@ -8,7 +8,7 @@ async function runScript(){
8 8
     try {
9 9
         
10 10
         //按照高频单词的使用频率排序,列出所有单词
11
-        const sql="select * from MiaoguoLiteracy where ID>0 order by ID desc limit 100;";
11
+        const sql="select * from MiaoguoLiteracy where ID<=114758 order by ID desc;";
12 12
         
13 13
         let list = await commonModel.RunSql(null,sql);
14 14
         let count=list.length;
@@ -18,33 +18,49 @@ async function runScript(){
18 18
         
19 19
         for(let i=start;i<count;i++){
20 20
             let item=list[i];
21
-            let json=JSON.parse(item.JSONString);
22 21
             let b=false;
23
-            if (json.ENG){
24
-                if (!stringUtils.IsArray(json.ENG.Paraphrase)){
25
-                    json.ENG.Paraphrase=[json.ENG.Paraphrase];
26
-                    console.log(json.ENG.Paraphras);
27
-                    b=true;
28
-                }
29
-                if (!stringUtils.IsArray(json.ENG.Paraphrase[0].ParaphraseList)){
30
-                    json.ENG.Paraphrase[0].ParaphraseList=[json.ENG.Paraphrase[0].ParaphraseList];
31
-                    console.log(json.ENG.Paraphrase[0].ParaphraseList);
32
-                    b=true;
22
+            let json;
23
+            try{
24
+                json=JSON.parse(item.JSONString);
25
+                
26
+                if (json.ENG){
27
+                    if (json.ENG.Paraphrase && !stringUtils.IsArray(json.ENG.Paraphrase)){
28
+                        json.ENG.Paraphrase=[json.ENG.Paraphrase];
29
+                        //console.log(json.ENG.Paraphras);
30
+                        b=true;
31
+
32
+                        if (json.ENG.Paraphrase[0].PartOfSpeech=="")
33
+                            json.ENG.Paraphrase[0].PartOfSpeech="释义";
34
+                        
35
+
36
+                        if (json.ENG.Paraphrase[0].ParaphraseList && !stringUtils.IsArray(json.ENG.Paraphrase[0].ParaphraseList)){
37
+                            json.ENG.Paraphrase[0].ParaphraseList=[json.ENG.Paraphrase[0].ParaphraseList];
38
+                            //console.log(json.ENG.Paraphrase[0].ParaphraseList);
39
+                            b=true;
40
+                        }
41
+                    }
42
+                    
33 43
                 }
34 44
             }
45
+            catch(err2){
46
+                console.error('Error executing script:', err2);
47
+                console.log(i+ " "+item.Word);
48
+                process.exit(1);
49
+            }
35 50
             
36 51
             if (b){
37 52
                 json=JSON.stringify(json);
38 53
                 
39 54
                 let sql3="update MiaoguoLiteracy set JSONString=? where ID="+item.ID+";";
40 55
                 //console.log(sql3);
41
-                console.log(json);
42
-                
43
-                //await commonModel.RunSql(json,sql3);
44
-                process.exit(1);
56
+                //console.log(json);
57
+                console.log( i +"/"+ list.length+" "+ item.ID);
58
+
59
+                await commonModel.RunSql(json,sql3);
60
+                //process.exit(1);
45 61
             }
46 62
             
47
-            console.log( i +"/"+ list.length+" "+ item.ID);
63
+            
48 64
         }
49 65
         
50 66
         console.log("完成");

+ 1 - 1
src/util/stringClass.js

@@ -795,7 +795,7 @@ export const stringUtils = {
795 795
     },
796 796
     //判断是否是英文
797 797
     IsEnglish: (temp) => {
798
-        const reg = new RegExp("^[A-Za-z]+(?:['-][A-Za-z]+)*$");
798
+        const reg = new RegExp("^[A-Za-z]+(?:['-][A-Za-z]+)*(?:\\s+[A-Za-z]+(?:['-][A-Za-z]+)*)*$");
799 799
         return reg.test(temp);     /*进行验证*/
800 800
     },
801 801
     //循环熔断函数