ocr.js 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. import common from '../../utils/util';
  2. import main from '../../utils/main';
  3. Page({
  4. data: {
  5. cameraActive: true,
  6. imageUrl: '',
  7. recognizedTexts: [],
  8. showCanvas: false,
  9. Count:0,
  10. },
  11. onLoad: function(options) {
  12. let that=this;
  13. that.setData({
  14. englishWords: null, // 存储提取的英语单词
  15. });
  16. },
  17. // 提取英语单词的函数 - 增强版
  18. extractEnglishWords(texts) {
  19. console.group('英语单词提取');
  20. const words = new Set();
  21. texts.forEach(item => {
  22. const text = item.text;
  23. console.log('处理文本:', text);
  24. // 改进的正则表达式,能更好处理中英文混合文本
  25. const wordRegex = /(?:^|\s|[\u4e00-\u9fa5])([A-Za-z]{2,}(?:['’-][A-Za-z]+)*)(?=$|\s|[\u4e00-\u9fa5])/g;
  26. let match;
  27. while ((match = wordRegex.exec(text)) !== null) {
  28. const word = match[1];
  29. console.log('匹配到单词:', word);
  30. // 验证单词有效性
  31. if (/^[A-Za-z'’-]+$/.test(word)) {
  32. const lowerWord = word.toLowerCase();
  33. words.add(lowerWord);
  34. console.log('添加单词:', lowerWord);
  35. }
  36. }
  37. });
  38. //const result = Array.from(words).sort();
  39. const result=Array.from(words);
  40. console.log('提取结果:', result);
  41. console.groupEnd();
  42. return result;
  43. },
  44. // 拍照识别
  45. takePhoto() {
  46. const ctx = wx.createCameraContext()
  47. ctx.takePhoto({
  48. quality: 'high',
  49. success: (res) => {
  50. this.setData({
  51. imageUrl: res.tempImagePath,
  52. cameraActive: false,
  53. showCanvas: true
  54. })
  55. this.performOCR(res.tempImagePath)
  56. },
  57. fail: (err) => {
  58. console.error('拍照失败:', err)
  59. wx.showToast({
  60. title: '拍照失败,请重试',
  61. icon: 'none'
  62. })
  63. }
  64. })
  65. },
  66. // 从相册选择
  67. chooseImage() {
  68. wx.chooseMedia({
  69. count: 1,
  70. mediaType: ['image'],
  71. sourceType: ['album'],
  72. success: (res) => {
  73. const tempFilePath = res.tempFiles[0].tempFilePath
  74. this.setData({
  75. imageUrl: tempFilePath,
  76. cameraActive: false,
  77. showCanvas: true
  78. })
  79. this.performOCR(tempFilePath)
  80. },
  81. fail: (err) => {
  82. console.error('选择图片失败:', err)
  83. wx.showToast({
  84. title: '选择图片失败',
  85. icon: 'none'
  86. })
  87. }
  88. })
  89. },
  90. // 加强版的OCR识别方法
  91. async performOCR(imagePath) {
  92. if (!imagePath) {
  93. console.error('图片路径无效')
  94. }
  95. wx.showLoading({ title: '识别中...', mask: true })
  96. try {
  97. // 1. 压缩图片
  98. const compressed = await new Promise((resolve, reject) => {
  99. wx.compressImage({
  100. src: imagePath,
  101. quality: 70,
  102. success: resolve,
  103. fail: () => resolve({ tempFilePath: imagePath })
  104. })
  105. })
  106. //console.log("1");
  107. // 2. 转换为base64
  108. const fileRes = await new Promise((resolve, reject) => {
  109. wx.getFileSystemManager().readFile({
  110. filePath: compressed.tempFilePath,
  111. encoding: 'base64',
  112. success: resolve,
  113. fail: reject
  114. })
  115. })
  116. //console.log("2");
  117. // 3. 调用云函数(添加超时处理)
  118. let postData={ ImageBase64: `data:image/jpeg;base64,${fileRes.data}` };
  119. let url = common.Encrypt("OCRImageData");
  120. url="https://www.kylx365.com/apiData/"+url;
  121. //url="http://localhost:3020/api/OCRImageData";
  122. //console.log("url:"+url);
  123. const cloudRes = await new Promise((resolve, reject) => {
  124. wx.request({
  125. url: url,
  126. method: "POST",
  127. data: postData,
  128. success: resolve,
  129. fail: reject,
  130. })
  131. });
  132. //console.log("3");
  133. // 4. 验证返回结果
  134. if (!cloudRes || !cloudRes.data.result) {
  135. throw new Error('无效的响应格式')
  136. }
  137. //console.log("4");
  138. if (!cloudRes.data.result) {
  139. throw new Error(cloudRes.data.result.message || '识别服务返回空数据')
  140. }
  141. // 5. 处理识别结果
  142. const texts = cloudRes.data.result.TextDetections.map(item => ({
  143. text: item.DetectedText || '未识别到文字',
  144. pos: this.convertPosition(item.ItemPolygon || { Points: [] })
  145. })).filter(item => item.DetectedText !== '未识别到文字')
  146. //console.log("5");
  147. if (texts.length === 0) {
  148. throw new Error('未识别到有效文字')
  149. }
  150. // 6.提取英文单词
  151. const engTexts=this.extractEnglishWords(texts);
  152. let arr=[];
  153. for(var i=0;i<engTexts.length;i++){
  154. let obj={};
  155. obj.Word=engTexts[i];
  156. obj.Selected=0;
  157. arr.push(obj);
  158. }
  159. this.setData({
  160. recognizedTexts: texts,
  161. englishWords:arr,
  162. })
  163. } catch (err) {
  164. console.error('OCR处理失败:', err)
  165. wx.showToast({
  166. title: err.message || '识别失败',
  167. icon: 'none',
  168. duration: 3000
  169. })
  170. } finally {
  171. wx.hideLoading()
  172. }
  173. },
  174. // 加强坐标转换
  175. convertPosition(polygon) {
  176. try {
  177. const points = polygon.Points || []
  178. if (points.length === 0) {
  179. return { x: 50, y: 50, width: 200, height: 30 }
  180. }
  181. const xs = points.map(p => p.X || 0)
  182. const ys = points.map(p => p.Y || 0)
  183. return {
  184. x: Math.min(...xs),
  185. y: Math.min(...ys),
  186. width: Math.max(...xs) - Math.min(...xs),
  187. height: Math.max(...ys) - Math.min(...ys)
  188. }
  189. } catch (e) {
  190. return { x: 50, y: 50, width: 200, height: 30 }
  191. }
  192. },
  193. // 重新拍照
  194. retake() {
  195. this.setData({
  196. cameraActive: true,
  197. imageUrl: '',
  198. recognizedTexts: [],
  199. showCanvas: false
  200. })
  201. },
  202. // 选择单词
  203. selectWord(e) {
  204. let that=this;
  205. let list=that.data.englishWords;
  206. const text = e.currentTarget.dataset.text
  207. if (!text) return;
  208. for(let i=0;i<list.length;i++){
  209. if (text.Word==list[i].Word){
  210. list[i].Selected=list[i].Selected==1?0:1;
  211. }
  212. }
  213. that.setData({
  214. englishWords:that.data.englishWords,
  215. });
  216. let count=0;
  217. for(let i=0;i<list.length;i++){
  218. if (list[i].Selected==1){
  219. count++;
  220. }
  221. }
  222. if (count>10){
  223. wx.showToast({
  224. title: '最多10个单词',
  225. })
  226. }
  227. that.setData({
  228. Count:count,
  229. });
  230. },
  231. // 跳转到文章生成页面
  232. goToArticleGenerator() {
  233. let that=this;
  234. let words=[];
  235. let list=that.data.englishWords;
  236. for(let i=0;i<list.length;i++){
  237. if (list[i].Selected==1){
  238. words.push(list[i].Word);
  239. }
  240. }
  241. wx.redirectTo({
  242. url: '/pages/index/index?words=' + words.join(","),
  243. });
  244. }
  245. })