parent
90a7edc4f7
commit
a661efa35d
@ -0,0 +1,75 @@ |
||||
package cn.iocoder.yudao.server.service; |
||||
|
||||
import cn.hutool.core.util.StrUtil; |
||||
import org.springframework.stereotype.Service; |
||||
|
||||
import java.util.LinkedHashMap; |
||||
import java.util.List; |
||||
import java.util.Map; |
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
@Service |
||||
public class IdCardInfoService { |
||||
// 身份证号的正则表达式
|
||||
private static final Pattern ID_PATTERN = Pattern.compile("公民身份号码(\\d{17}[0-9Xx])"); |
||||
// 地址的正则表达式
|
||||
private static final Pattern ADDRESS_PATTERN = Pattern.compile("住址(.*?)公民身份号码"); |
||||
|
||||
public Map<String, Object> extractIdCardInfo(List<String> texts) { |
||||
Map<String, Object> data = new LinkedHashMap<>(); |
||||
|
||||
// 合并多页文本
|
||||
String mergedText = String.join("", texts); |
||||
extractNameWithValidation(mergedText, data); |
||||
extractIdNumber(mergedText, data); |
||||
return data; |
||||
} |
||||
|
||||
// 身份证号提取(保持原有逻辑)
|
||||
private void extractIdNumber(String mergedText, Map<String, Object> data) { |
||||
// 匹配身份证号
|
||||
Matcher idMatcher = ID_PATTERN.matcher(mergedText.replaceAll("\\s", "")); |
||||
if (idMatcher.find()) { |
||||
data.put("身份证号", idMatcher.group(1).toUpperCase()); |
||||
} |
||||
|
||||
// 匹配地址
|
||||
Matcher addressMatcher = ADDRESS_PATTERN.matcher(mergedText.replaceAll("\\s", "")); |
||||
if (addressMatcher.find()) { |
||||
data.put("住址", addressMatcher.group(1)); |
||||
} |
||||
} |
||||
/** |
||||
* 身份证姓名提取 |
||||
* @param texts |
||||
* @param data |
||||
*/ |
||||
private void extractNameWithValidation(String texts, Map<String, Object> data) { |
||||
// 获取"姓名"和"性别"之间的内容
|
||||
String nameBetween = StrUtil.subBetween(texts, "姓名", "性别"); |
||||
|
||||
if (StrUtil.isBlank(nameBetween)) { |
||||
// 情况1:中间为空时取"姓名"前的内容
|
||||
String nameBefore = StrUtil.subBefore(texts, "姓名", false); |
||||
data.put("姓名", nameBefore); |
||||
} else { |
||||
// 情况2:用正则处理"民族"或"民旅"
|
||||
String processedName = processEthnicKeyword(nameBetween); |
||||
data.put("姓名", processedName); |
||||
} |
||||
} |
||||
|
||||
// 使用正则表达式匹配关键词
|
||||
private String processEthnicKeyword(String input) { |
||||
// 正则匹配"民族"或"民旅"(兼容错别字)
|
||||
Pattern pattern = Pattern.compile("民[族旅]"); |
||||
Matcher matcher = pattern.matcher(input); |
||||
|
||||
if (matcher.find()) { |
||||
// 截取关键词之前的内容
|
||||
return input.substring(0, matcher.start()); |
||||
} |
||||
return input; // 无匹配时返回原内容
|
||||
} |
||||
} |
||||
@ -0,0 +1,62 @@ |
||||
package cn.iocoder.yudao.server.service; |
||||
|
||||
import org.springframework.stereotype.Service; |
||||
|
||||
import java.util.ArrayList; |
||||
import java.util.HashMap; |
||||
import java.util.List; |
||||
import java.util.Map; |
||||
|
||||
@Service |
||||
public class SpeicalCertificateService { |
||||
|
||||
/** |
||||
* 监督检验证书 |
||||
* @param texts |
||||
* @return |
||||
*/ |
||||
public Map<String, Object> extractSpeicalCertificateInfo(List<String> texts){ |
||||
Map<String, Object> data = new HashMap<>(); |
||||
ArrayList<String> arrayList = new ArrayList<>(); |
||||
|
||||
extractManufacturerName(texts, data); |
||||
getSpeicalCertificateName(texts, data); |
||||
extractManufacturerTime(texts, data); |
||||
return data; |
||||
} |
||||
|
||||
/** |
||||
* 提取监督检验证书的制造单位名称 |
||||
* @param text |
||||
* @param data |
||||
*/ |
||||
private static void extractManufacturerName(List<String> texts, Map<String, Object> data) { |
||||
boolean hasCCTZ = texts.stream().anyMatch(t -> t.contains("长春致远新能源装备股份有限公司")); |
||||
if (hasCCTZ) { |
||||
data.put("制造单位名称","长春致远新能源装备股份有限公司"); |
||||
} |
||||
} |
||||
private static void getSpeicalCertificateName(List<String> texts, Map<String, Object> data) { |
||||
boolean hasCCTZ = texts.stream().anyMatch(t -> t.contains("长春特种设备检测研究院")); |
||||
if (hasCCTZ) { |
||||
data.put("监督检验机构名称","长春特种设备检测研究院"); |
||||
} |
||||
} |
||||
private static void extractManufacturerTime(List<String> texts, Map<String, Object> data) { |
||||
boolean hasCCTZ = texts.stream().anyMatch(t -> t.contains("长春特种设备检测研究院")); |
||||
if (hasCCTZ) { |
||||
for (int i = 0; i < texts.size(); i++) { |
||||
String current = texts.get(i); |
||||
// 提取产品批号
|
||||
if ("产品批号".equals(current) && i + 2 < texts.size()) { |
||||
data.put("产品批号",texts.get(i + 2)); |
||||
} |
||||
// 提取制造日期
|
||||
if ("制造日期".equals(current) && i + 1 < texts.size()) { |
||||
data.put("制造日期",texts.get(i + 1)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
} |
||||
} |
||||
@ -0,0 +1,167 @@ |
||||
package cn.iocoder.yudao.server.service; |
||||
|
||||
import cn.hutool.core.util.ReUtil; |
||||
import cn.hutool.core.util.StrUtil; |
||||
import org.springframework.stereotype.Service; |
||||
|
||||
import java.util.HashMap; |
||||
import java.util.List; |
||||
import java.util.Map; |
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
/** |
||||
* 行驶证识别类 |
||||
*/ |
||||
@Service |
||||
public class VehicleLicenseService { |
||||
public static void main(String[] args) { |
||||
String input = "中华人民共和国机动车行驶证Veltele Lleenseor tPeopiesRepubhlear China号牌导码号牌号码_冀A4647W鄂A4647W档案编号130111431982PlateNo车辆类型Yeeer-Tyie重型半挂牵引车所有人核定较人数2人石家庄畅宇汽车运输有限公司总质量住址河北省石家压市泉区上江镇韩庄村永来街香巷3号整质8870kg核定载所证Adurexs使用性质外尺寸.7400×2550×3560mm准本H2频量48000kg货运昂牌型号Midet解放牌CA4250P66M25T1E6备强制报废期止:2039-11-01河北省石家车辆调润代号NIALFWSRX9L8RIF18215庄市公安局发动机号码ON54120098检验有效期至2025年11月冀A交通管理局注册扫期检验记录RepeierDus2024-11-01发证日期2024-11-91天然气lsueDate300051111965"; |
||||
|
||||
// 正则表达式匹配“代号”后以L开头的17位字母数字组合
|
||||
Pattern pattern = Pattern.compile("代号.*?(L[A-Za-z0-9]{16})"); |
||||
Matcher matcher = pattern.matcher(input); |
||||
|
||||
if (matcher.find()) { |
||||
String code = matcher.group(1); |
||||
System.out.println("匹配到的代号: " + code); // 输出:LFWSRX9L8RIF18215
|
||||
} else { |
||||
System.out.println("未找到符合要求的代号"); |
||||
} |
||||
} |
||||
public Map<String, Object> extractVehicleLicenseInfo(List<String> texts) { |
||||
Map<String, Object> data = new HashMap<>(); |
||||
String text = String.join("", texts) |
||||
.replaceAll("2925-", "2025-") |
||||
.replaceAll("\\.", ""); |
||||
data.put("号牌号码", extractPlateNo(text)); // 冀A4336E
|
||||
data.put("车辆识别代码", extractVin(text)); // LFWSRX9L2RIF17688
|
||||
data.put("住址", extractAddress(text)); // 河北省石家庄市鹿泉区上庄镇韩庄村永乐街芳香巷3号
|
||||
data.put("发证日期", extractIssueDate(text)); // 2024-11-01
|
||||
|
||||
return data; |
||||
} |
||||
private static String extractVin(String text) { |
||||
// 正则表达式直接匹配以 L 开头的 17 位字符
|
||||
Pattern pattern = Pattern.compile("代号.*?(L[A-Za-z0-9]{16})"); |
||||
Matcher matcher = pattern.matcher(text); |
||||
|
||||
if (matcher.find()) { |
||||
String code = matcher.group(1); |
||||
System.out.println("匹配到的代号: " + code); // 输出:LFWSRX9L8RIF18215
|
||||
return code; |
||||
} else { |
||||
System.out.println("未找到符合要求的代号"); |
||||
} |
||||
return null; |
||||
} |
||||
|
||||
private static String extractPlateNo(String text) { |
||||
// 综合正则表达式(覆盖所有常见车牌类型)
|
||||
String regex = |
||||
"(?<![0-9A-Z])" // 前导边界控制
|
||||
+ "([" |
||||
+ "京津沪渝冀晋辽吉黑苏浙皖闽赣鲁豫鄂湘粤琼川贵云陕甘青台蒙藏桂宁新港澳使领学警" // 省份简称白名单
|
||||
+ "])" |
||||
+ "(" |
||||
+ "([A-HJ-NP-ZDF](?:·?[0-9A-HJ-NP-Z]{5,6})" // 新能源/普通车牌
|
||||
+ "|([A-HJ-NP-Z][0-9A-HJ-NP-Z]{4}[挂学警港澳])" // 普通车牌
|
||||
+ "|(Z·[0-9A-HJ-NP-Z]{4,5})" // 港澳车牌
|
||||
+ "|([使领]\\d{6})" // 使馆车牌
|
||||
+ ")" |
||||
+ "(?![0-9A-Z]))"; // 后续边界控制
|
||||
|
||||
// 执行匹配
|
||||
Matcher matcher = Pattern.compile(regex).matcher(text); |
||||
if (matcher.find()) { |
||||
// 清洗分隔符并返回
|
||||
return matcher.group().replaceAll("[·\\s]", ""); |
||||
} |
||||
return null; |
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
private static String extractAddress(String text) { |
||||
// 精确地址提取(到门牌号)
|
||||
return ReUtil.getGroup1( |
||||
"住址([\u4e00-\u9fa5]+?省[\u4e00-\u9fa5]+?市.*?\\d+号)", |
||||
text); |
||||
} |
||||
|
||||
private static String extractIssueDate(String text) { |
||||
// 主正则:严格匹配标准格式
|
||||
String primaryRegex = "(?:发证日期|IsueDate|ReastrDite)[^\\d]{0,5}(20\\d{2}[-=年]?\\d{1,2}[-=月]?\\d{1,2})"; |
||||
|
||||
// 备用正则:精确匹配紧凑格式
|
||||
String fallbackRegex = "(?i)(?:发证日期|IsueDate|ReastrDite)[^\\d]{0,5}(20\\d{2})-?(0[1-9]|1[0-2])(0[1-9]|[12]\\d|3[01])"; |
||||
|
||||
// 优先尝试主正则匹配
|
||||
String rawDate = ReUtil.get(primaryRegex, text, 1); |
||||
|
||||
// 主正则匹配失败时尝试备用正则
|
||||
if (StrUtil.isBlank(rawDate)) { |
||||
Matcher fallbackMatcher = Pattern.compile(fallbackRegex).matcher(text); |
||||
if (fallbackMatcher.find()) { |
||||
rawDate = String.format("%s-%s-%s", |
||||
fallbackMatcher.group(1), |
||||
fallbackMatcher.group(2), |
||||
fallbackMatcher.group(3)); |
||||
} |
||||
} |
||||
|
||||
// 兜底方案:精准倒序扫描
|
||||
if (StrUtil.isBlank(rawDate)) { |
||||
// 增强型正则(匹配日期核心部分)
|
||||
Pattern fallbackPattern = Pattern.compile( |
||||
"(20\\d{2}[-/年.]?\\d{2}[-/月.]?\\d{2})|(20\\d{6})" |
||||
); |
||||
|
||||
// 获取所有候选并倒序筛选
|
||||
List<String> candidates = ReUtil.findAll(fallbackPattern, text, 0); |
||||
for (int i = candidates.size()-1; i >=0 ; i--) { |
||||
String candidate = candidates.get(i); |
||||
|
||||
// 清洗并验证日期有效性
|
||||
String cleaned = candidate.replaceAll("[^0-9]", ""); |
||||
if (cleaned.length() != 8) { |
||||
continue; |
||||
} |
||||
|
||||
try { |
||||
int year = Integer.parseInt(cleaned.substring(0,4)); |
||||
int month = Integer.parseInt(cleaned.substring(4,6)); |
||||
int day = Integer.parseInt(cleaned.substring(6,8)); |
||||
|
||||
if (year < 2000 || year > 2099) { |
||||
continue; |
||||
} |
||||
if (month < 1 || month > 12) { |
||||
continue; |
||||
} |
||||
if (day < 1 || day > 31) { |
||||
continue; |
||||
} |
||||
|
||||
// 找到第一个有效日期立即返回
|
||||
rawDate = String.format("%04d-%02d-%02d", year, month, day); |
||||
break; |
||||
} catch (Exception e) { |
||||
continue; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// 最终校验
|
||||
if (StrUtil.isNotBlank(rawDate)) { |
||||
// 排除闰年等复杂校验(按业务需求可扩展)
|
||||
return rawDate; |
||||
} |
||||
return null; |
||||
} |
||||
|
||||
|
||||
|
||||
} |
||||
Loading…
Reference in new issue