zhouc ocr识别图片

This commit is contained in:
zhouc 2025-03-05 15:05:14 +08:00
parent 0073e62c59
commit b5b24e21b2
2 changed files with 80 additions and 28 deletions

View File

@ -8,10 +8,12 @@ import com.volcengine.ark.runtime.model.completion.chat.ChatMessageRole;
import com.volcengine.ark.runtime.service.ArkService;
import com.wmyun.framework.common.pojo.CommonResult;
import com.wmyun.module.system.controller.admin.forward.vo.OCRReqVO;
import com.wmyun.module.system.controller.admin.pdf.vo.PdfResultVO;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.validation.Valid;
import jodd.util.StringUtil;
import org.json.JSONObject;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.validation.annotation.Validated;
@ -20,9 +22,7 @@ import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import static com.wmyun.framework.common.pojo.CommonResult.error;
@ -47,60 +47,112 @@ public class OCRController {
@PostMapping(value = "/OCRinterface", consumes = "multipart/form-data")
@Operation(summary = "OCR识别")
@PreAuthorize("@ss.hasPermission('system:forward:OCRinterface')")
public CommonResult<String> OCRinterface(@RequestParam("file") MultipartFile file,
@RequestParam("mag") String mag) {
public CommonResult<OCRReqVO> OCRinterface(@RequestParam("file") MultipartFile file,
@RequestParam("mag") String mag) {
try {
String imageurl = "";
// 校验文件非空
if (file.isEmpty()) {
throw new IllegalArgumentException("文件为空");
}
String contentType = file.getContentType();
int lastDotIndex = file.getOriginalFilename().lastIndexOf('.');
String OriginalFilename = file.getOriginalFilename().substring(lastDotIndex + 1).toLowerCase();
// 读取字节流并编码
try (InputStream inputStream = file.getInputStream()) {
byte[] bytes = inputStream.readAllBytes();
imageurl = Base64.getEncoder().encodeToString(bytes);
}
imageurl = "data:" + contentType + ";base64," + imageurl;
Object getmsg = getmsg(mag, imageurl);
return success(getmsg.toString());
Map<String, String> getimage = getimage();
if (getimage.containsKey(OriginalFilename)) {
imageurl = "data:" + getimage.get(OriginalFilename) + ";base64," + imageurl;
}
Map<String, Object> getmsg = getmsg(mag, imageurl);
if (getmsg.get("success").equals("1")) {
OCRReqVO ocrReqVO= new OCRReqVO();
ocrReqVO.setMakedown(getmsg.get("msg"));
ocrReqVO.setJson(getmsg.get("msg1"));
return success(ocrReqVO);
} else {
return error(400, getmsg.get("msg").toString());
}
} catch (Exception exception) {
String message = exception.getMessage();
return error(411, message);
}
}
public Object getmsg(String mag, String imageurl) {
AtomicReference<Object> msg = new AtomicReference<>("");
public Map<String, Object> getmsg(String mag, String imageurl) {
Map<String, Object> map = new HashMap<>();
AtomicReference<Object> msg = new AtomicReference<>();
AtomicReference<Object> msg1 = new AtomicReference<>();
try {
String mag1 = "再输出一个格式{ \"收票方名称\": \"\", \"开票方名称\": \"\", \"总金额\": \"\", \"税额\": \"\", \"不含税金额\": \"\", \"币种\": \"\", \"银行\": \"\", \"账户名称\": \"\", \"账户号码\": \"\", \"Swift代码\": \"\", \"商品货物项目明细\": [ { \"数量\": \"\", \"单位\": \"\", \"货物\": \"\", \"单价\": \"\", \"税率\": \"\", \"总价\": \"\" } ] }";
ArkService service = ArkService.builder().apiKey(API_KEY).baseUrl(baseUrl).build();
// ArkService service = new ArkService(ARK_API_KEY);
System.out.println("----- image input -----");
final List<ChatMessage> messages = new ArrayList<>();
final List<ChatCompletionContentPart> multiParts = new ArrayList<>();
multiParts.add(ChatCompletionContentPart.builder().type("text").text(mag).build());
multiParts.add(ChatCompletionContentPart.builder().type("image_url").imageUrl(
new ChatCompletionContentPart.ChatCompletionContentPartImageURL(imageurl)
).build());
if (imageurl.length() > 0) {
multiParts.add(ChatCompletionContentPart.builder().type("image_url").imageUrl(
new ChatCompletionContentPart.ChatCompletionContentPartImageURL(imageurl)).build());
}
final ChatMessage userMessage = ChatMessage.builder().role(ChatMessageRole.USER)
.multiContent(multiParts).build();
messages.add(userMessage);
ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
.model(DEEPSEEK_OCR_MODEL)
.messages(messages)
.build();
.model(DEEPSEEK_OCR_MODEL).messages(messages).build();
service.createChatCompletion(chatCompletionRequest).getChoices().forEach(
choice ->
msg.set(choice.getMessage().getContent()));
choice -> msg.set(choice.getMessage().getContent()));
// //第二论
final List<ChatMessage> messages1 = new ArrayList<>();
messages1.add(userMessage);
final List<ChatCompletionContentPart> multiParts1 = new ArrayList<>();
multiParts1.add(ChatCompletionContentPart.builder().type("text").text(msg.toString()).build());
final ChatMessage userMessage1 = ChatMessage.builder().role(ChatMessageRole.SYSTEM)
.multiContent(multiParts1).build();
messages1.add(userMessage1);
final List<ChatCompletionContentPart> multiParts2 = new ArrayList<>();
multiParts2.add(ChatCompletionContentPart.builder().type("text").text(mag1).build());
final ChatMessage userMessage2 = ChatMessage.builder().role(ChatMessageRole.USER)
.multiContent(multiParts2).build();
messages1.add(userMessage2);
ChatCompletionRequest chatCompletionRequest1 = ChatCompletionRequest.builder()
.model(DEEPSEEK_OCR_MODEL).messages(messages1).build();
service.createChatCompletion(chatCompletionRequest1).getChoices().forEach(
choice -> msg1.set(choice.getMessage().getContent()));
map.put("success", "1");
map.put("msg", msg);
map.put("msg1", msg1);
service.shutdownExecutor();
} catch (ArkHttpException e) {
map.put("success", "0");
map.put("msg", e.toString());
System.out.print(e.toString());
}
return msg;
return map;
}
public Map<String, String> getimage() {
Map<String, String> map = new HashMap<>();
map.put("jpg", "image/jpeg");
map.put("jpeg", "image/jpeg");
map.put("png", "image/png");
map.put("apng", "image/png");
map.put("gif", "image/gif");
map.put("webp", "image/webp");
map.put("bmp", "image/bmp");
map.put("tiff", "image/tiff");
map.put("tif", "image/tiff");
map.put("ico", "image/x-icon");
map.put("dib", "image/bmp");
map.put("icns", "image/icns");
map.put("j2c", "image/jp2");
map.put("j2k", "image/jp2");
map.put("jp2", "image/jp2");
map.put("jpc", "image/jp2");
map.put("jpf", "image/jp2");
map.put("jpx", "image/jp2");
return map;
}
}

View File

@ -17,8 +17,8 @@ import org.springframework.web.multipart.MultipartFile;
@NoArgsConstructor
@Schema(description = "管理后台 - OCR Request VO")
public class OCRReqVO {
@Schema(description = "文件", example = "https://XXXXXX/login")
private MultipartFile file;
@Schema(description = "用户消息", example = "今天天气怎么样!")
private String mag;
@Schema(description = "makedown格式", example = "")
private Object makedown;
@Schema(description = "json格式", example = "")
private Object json;
}