|
|
|
|
@ -8,12 +8,7 @@ import java.util.regex.Pattern; |
|
|
|
|
|
|
|
|
|
@Service |
|
|
|
|
public class UnifiedSocialCreditService { |
|
|
|
|
// 完整百家姓集合(包含504个单姓和复姓)
|
|
|
|
|
private static final Set<String> SURNAMES = new LinkedHashSet<>(650); |
|
|
|
|
static { |
|
|
|
|
// 初始化姓氏数据
|
|
|
|
|
initSurnames(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// 省级行政区划全称(包含省、自治区、直辖市、特别行政区)
|
|
|
|
|
private static final String[] PROVINCES = { |
|
|
|
|
"北京市", "天津市", "河北省", "山西省", "内蒙古自治区", "辽宁省", "吉林省", "黑龙江省", |
|
|
|
|
@ -39,47 +34,6 @@ public class UnifiedSocialCreditService { |
|
|
|
|
"(?=\\s|扫描二维码|成立日期|注册资本|http|$)" // 精准终止符
|
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
private static void initSurnames() { |
|
|
|
|
// 单姓列表(444个)
|
|
|
|
|
SURNAMES.addAll(Arrays.asList( |
|
|
|
|
"王","李","张","刘","陈","杨","黄","赵","周","吴","徐","孙","马","朱","胡","林","郭","何","高","罗", |
|
|
|
|
"郑","梁","谢","宋","唐","许","邓","冯","韩","曹","曾","彭","萧","蔡","潘","田","董","袁","于","余", |
|
|
|
|
"叶","蒋","杜","苏","魏","程","吕","丁","沈","任","姚","卢","傅","钟","姜","崔","谭","廖","范","汪", |
|
|
|
|
"陆","金","石","戴","贾","韦","夏","邱","方","侯","邹","熊","孟","秦","白","江","阎","薛","尹","段", |
|
|
|
|
"雷","黎","史","龙","陶","贺","顾","毛","郝","龚","邵","万","钱","严","赖","覃","洪","武","莫","孔", |
|
|
|
|
"向","常","汤","文","牛","樊","葛","邢","安","齐","易","乔","伍","庞","颜","倪","庄","聂","章","鲁", |
|
|
|
|
"岳","翟","殷","詹","申","欧","耿","关","兰","焦","俞","左","柳","甘","祝","包","宁","尚","符","舒", |
|
|
|
|
"阮","柯","纪","梅","童","凌","毕","季","裴","霍","涂","成","苗","谷","盛","曲","翁","冉","骆","蓝", |
|
|
|
|
"路","游","辛","靳","管","柴","蒙","鲍","华","喻","祁","蒲","房","滕","屈","饶","解","牟","艾","尤", |
|
|
|
|
"阳","时","穆","农","司","古","吉","缪","简","车","项","连","芦","麦","褚","娄","窦","戚","岑","党", |
|
|
|
|
"宫","费","卜","冷","晏","席","卫","米","柏","宗","邬","瞿","商","谈","靳","邰","姬","申","扶","堵", |
|
|
|
|
"冉","宰","雍","郤","璩","桑","桂","濮","牛","寿","通","边","扈","燕","冀","郏","浦","尚","农", |
|
|
|
|
"温","别","庄","晏","柴","瞿","阎","充","慕","连","茹","习","宦","艾","鱼","容","向","戈","庾","暨", |
|
|
|
|
"居","衡","步","都","耿","满","弘","匡","国","文","寇","广","禄","阙","东","欧","殳","沃","利","蔚", |
|
|
|
|
"越","夔","隆","师","巩","厍","聂","晁","勾","敖","融","冷","訾","辛","阚","那","简","饶","空", |
|
|
|
|
"曾","毋","沙","乜","养","鞠","须","丰","巢","关","蒯","相","查","后","荆","红","游","竺","权","逯", |
|
|
|
|
"盖","益","桓","公","万俟","司马","上官","欧阳" |
|
|
|
|
)); |
|
|
|
|
|
|
|
|
|
// 复姓列表(60个)及前缀处理
|
|
|
|
|
SURNAMES.addAll(Arrays.asList( |
|
|
|
|
"欧阳","上官","皇甫","令狐","诸葛","司马","宇文","尉迟","慕容","闾丘", |
|
|
|
|
"公羊","澹台","公冶","宗政","濮阳","申屠","公孙","仲孙","轩辕","鲜于", |
|
|
|
|
"钟离","长孙","端木","拓跋","东郭","呼延","羊舌","万俟","南宫","西门", |
|
|
|
|
"亓官","司寇","颛孙","子车","巫马","壤驷","漆雕","乐正","宰父","谷梁", |
|
|
|
|
"段干","梁丘","东门","公西","微生","公户","公玉","公仪","仲长","叔孙", |
|
|
|
|
"屈突","尔朱","斛斯","轩辕","赫连","长孙" |
|
|
|
|
)); |
|
|
|
|
|
|
|
|
|
// 添加复姓前缀以增强匹配
|
|
|
|
|
SURNAMES.addAll(Arrays.asList( |
|
|
|
|
"欧","上","皇","令","诸","司","宇","尉","慕","闾", |
|
|
|
|
"公","澹","宗","濮","申","孙","仲","轩","鲜","钟", |
|
|
|
|
"长","端","拓","东","呼","羊","万","南","西","亓", |
|
|
|
|
"颛","子","巫","壤","漆","乐","宰","谷","段","梁", |
|
|
|
|
"微","叔","屈","尔","斛","赫" |
|
|
|
|
)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// 公司名称正则(增强版)
|
|
|
|
|
private static final Pattern COMPANY_PATTERN = Pattern.compile( |
|
|
|
|
@ -109,7 +63,7 @@ public class UnifiedSocialCreditService { |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
String name = company.replaceAll("^.*称", ""); |
|
|
|
|
data.put("企业名称", name); |
|
|
|
|
data.put("userUnit", name); |
|
|
|
|
return ; |
|
|
|
|
} |
|
|
|
|
// 兜底匹配:直接查找XX有限公司
|
|
|
|
|
@ -122,7 +76,7 @@ public class UnifiedSocialCreditService { |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
String name = findStr.replaceAll("^.*称", ""); |
|
|
|
|
data.put("企业名称", name); |
|
|
|
|
data.put("userUnit", name); |
|
|
|
|
} |
|
|
|
|
return ; |
|
|
|
|
} |
|
|
|
|
@ -147,7 +101,7 @@ public class UnifiedSocialCreditService { |
|
|
|
|
System.out.println("DEBUG[匹配候选]:" + code); // 调试输出
|
|
|
|
|
|
|
|
|
|
if (isValidCode(code)) { |
|
|
|
|
data.put("统一社会信用代码", code); |
|
|
|
|
data.put("unifiedSocialCode", code); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
@ -172,7 +126,7 @@ public class UnifiedSocialCreditService { |
|
|
|
|
.replaceAll("([号路])(\\d)", "$1$2"); // 修复门牌号格式
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data.put("住所", address); |
|
|
|
|
data.put("userAddress", address); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
// 兜底匹配逻辑(省级定位)
|
|
|
|
|
@ -191,45 +145,9 @@ public class UnifiedSocialCreditService { |
|
|
|
|
.replaceAll("经营.*器):", "") |
|
|
|
|
.replaceAll("住", "") |
|
|
|
|
.replaceAll("[^\\u4e00-\\u9fa5\\d]", ""); |
|
|
|
|
data.put("住所", address); |
|
|
|
|
data.put("userAddress", address); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 辅助方法:检查相邻行
|
|
|
|
|
private boolean checkAdjacentLine(List<String> texts, int index, Map<String, Object> data) { |
|
|
|
|
if (index >= 0 && index < texts.size()) { |
|
|
|
|
String line = texts.get(index).trim(); |
|
|
|
|
if (isValidName(line)) { |
|
|
|
|
data.put("姓名", line); |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// 增强版姓名验证(包含复姓处理)
|
|
|
|
|
private boolean isValidName(String candidate) { |
|
|
|
|
// 基础验证
|
|
|
|
|
if (candidate == null || candidate.isEmpty()) { |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
String cleaned = candidate.replaceAll("[^\\u4e00-\\u9fa5]", ""); |
|
|
|
|
if (cleaned.length() < 2 || cleaned.length() > 4) { |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// 复姓优先验证(2-3字)
|
|
|
|
|
for (int len = Math.min(3, cleaned.length()); len >= 2; len--) { |
|
|
|
|
String prefix = cleaned.substring(0, len); |
|
|
|
|
if (SURNAMES.contains(prefix)) { |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// 单姓验证
|
|
|
|
|
return SURNAMES.contains(cleaned.substring(0, 1)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|