浏览代码

调整车辆单位匹配逻辑

weijianghai 1 月之前
父节点
当前提交
ad4387e875
共有 3 个文件被更改,包括 454 次插入458 次删除
  1. 148 150
      car/car-chao-bao/car_chao-bao.py
  2. 150 151
      car/car-guo-jian/car_guo_jian.py
  3. 156 157
      car/car-wei-zhang/car_wei_zhang.py

+ 148 - 150
car/car-chao-bao/car_chao-bao.py

@@ -51,6 +51,27 @@ def data_process():
         r"[京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z][A-Z0-9]{4}[A-Z0-9挂学警港澳])"
     )
 
+    # 一级单位字典
+    first_unit_map = {
+        "保定市分公司": "保定",
+        "沧州市分公司": "沧州",
+        "承德市分公司": "承德",
+        "邯郸市分公司": "邯郸",
+        "河北省分公司线路维护中心": "机动局",
+        "河北省机动通信局": "机动局",
+        "衡水市分公司": "衡水",
+        "华北基地建设部": "华北基地建设部",
+        "廊坊市分公司": "廊坊",
+        "秦皇岛市分公司": "秦皇岛",
+        "省公司本部": "省公司本部",
+        "石家庄市分公司": "石家庄",
+        "唐山市分公司": "唐山",
+        "邢台市分公司": "邢台",
+        "雄安基地建设部": "雄安基地建设部",
+        "雄安新区分公司": "雄安",
+        "张家口市分公司": "张家口"
+    }
+
     # 定义二级行政区划映射表(地级市及其下属区县)
     er_ji_map = {
         "石家庄": ["鹿泉", "藁城", "栾城", "井陉矿区", "井陉", "无极", "正定", "元氏", "新乐", "晋州", "平山", "灵寿",
@@ -207,125 +228,92 @@ def data_process():
     # 应用 che_pai_fail 函数生成车牌匹配失败标记列
     df['车牌匹配失败'] = df['车牌号'].apply(che_pai_fail)
 
-    # 定义函数,用于提取一级单位
-    def get_first_unit(unit):
-        # 如果单位为空或无效,则返回空字符串
-        if pd.isna(unit) or not unit or not unit.strip():
-            return ""
-        # 根据单位名称中的关键词返回对应的一级单位
-        if "机动通信局" in unit or "机动局" in unit or "传输局" in unit or "线路维护中心" in unit:
-            return "机动局"
-        if "雄安基地建设部" in unit:
-            return "雄安基地建设部"
-        if "华北基地建设部" in unit:
-            return "华北基地建设部"
-        # 遍历 er_ji_map 的键,寻找匹配的一级单位
-        for yj in er_ji_map.keys():
-            if yj in unit:
-                return yj
-        return "省公司本部"  # 默认返回省公司本部
+    # 获取一级单位
+    def get_first_unit(x):
+        raw_che_pai = x['原始车牌号']
+        raw_first_unit = str(x['原始一级单位']) if pd.notna(x['原始一级单位']) else ""
+        if not raw_first_unit or not raw_first_unit.strip():
+            raise RuntimeError(f"一级单位为空:{raw_che_pai}")
+        if raw_first_unit in first_unit_map:
+            return first_unit_map.get(raw_first_unit)
+        raise RuntimeError(f"一级单位匹配失败:{raw_che_pai} {raw_first_unit}")
 
     # 应用 get_first_unit 函数生成一级单位列
-    df['一级单位'] = df['原始一级单位'].apply(get_first_unit)
+    df['一级单位'] = df.apply(get_first_unit, axis=1)
 
-    # 定义函数,用于提取二级单位
+    # 获取二级单位
     def get_second_unit(x):
-        # 获取一级单位和原始二级单位
         first_unit = str(x['一级单位']) if pd.notna(x['一级单位']) else ""
-        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
-        # 如果二级单位为空或无效,则返回一级单位
-        if not unit or not unit.strip():
+        raw_second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        if first_unit in ["华北基地建设部", "雄安基地建设部", "省公司本部"]:
             return first_unit
-        # 如果一级单位是省公司本部,则返回省公司本部
-        if first_unit == "省公司本部":
-            return first_unit
-        # 如果一级单位是机动局,则根据单位名称进一步细化
+        if not raw_second_unit or not raw_second_unit.strip():
+            return f"{first_unit}本部"
         if first_unit == "机动局":
             for yj in er_ji_map.keys():
-                if yj in unit:
+                if yj in raw_second_unit:
                     return f"机动局{yj}"
             return "机动局本部"
-        # 根据特定城市和关键词返回对应的二级单位
         if first_unit == "石家庄":
-            if "开发区" in unit:
+            if "开发区" in raw_second_unit:
                 return "石家庄开发区"
         if first_unit == "廊坊":
-            if "开发区" in unit:
+            if "开发区" in raw_second_unit:
                 return "廊坊开发区"
         if first_unit == "邢台":
-            if "内丘" in unit:
+            if "内丘" in raw_second_unit:
                 return "内邱"
-            if "任泽" in unit:
+            if "任泽" in raw_second_unit:
                 return "任县"
         if first_unit == "唐山":
-            if "高开区" in unit:
+            if "高开区" in raw_second_unit:
                 return "唐山高开区"
-            if "滦州" in unit:
+            if "滦州" in raw_second_unit:
                 return "滦县"
-        # 根据 er_ji_map 获取二级单位
-        ejs = er_ji_map.get(first_unit)
-        if not ejs:
-            return first_unit
+        ejs = er_ji_map.get(first_unit, [])
         if first_unit == "雄安":
-            unit = unit.replace("雄安新区", "")
+            raw_second_unit = raw_second_unit.replace("雄安新区", "")
         for ej in ejs:
-            if ej in unit:
+            if ej in raw_second_unit:
                 return ej
-        return f"{first_unit}本部"  # 默认返回一级单位本部
+        return f"{first_unit}本部"
 
     # 应用 get_second_unit 函数生成二级单位列
     df['二级单位'] = df.apply(get_second_unit, axis=1)
 
-    # 定义函数,用于提取三级单位
+    # 取三级单位
     def get_third_unit(x):
-        # 获取二级单位和原始三级单位
         second_unit = str(x['二级单位']) if pd.notna(x['二级单位']) else ""
-        unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
-        # 如果三级单位为空或无效,则返回二级单位
-        if not unit or not unit.strip():
-            return second_unit
-        # 按下划线分割三级单位名称
-        a = unit.split("_")
-        if len(a) == 1:
-            return unit
-        if len(a) < 4:
-            return second_unit
-        return a[3]  # 返回分割后的第四个部分作为三级单位
+        raw_third_unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        return raw_third_unit if raw_third_unit and raw_third_unit.strip() else second_unit
 
     # 应用 get_third_unit 函数生成三级单位列
     df['三级单位'] = df.apply(get_third_unit, axis=1)
 
     # 定义一个函数,用于根据单位名称获取二级组织机构编码
-    def get_area_no(unit):
-        # 如果单位为空或无效,则返回空字符串
-        if pd.isna(unit) or not unit or not unit.strip():
-            return ""
-        # 如果单位包含特定关键词(如“机动通信局”等),返回固定编码"-11"
-        if any(keyword in unit for keyword in ["机动通信局", "机动局", "传输局", "线路维护中心"]):
+    def get_area_no(first_unit):
+        if first_unit == "机动局":
             return "-11"
-        # 如果单位包含特定关键词(如“省公司本部”等),返回固定编码"-12"
-        if any(keyword in unit for keyword in ["省公司本部", "雄安基地建设部", "华北基地建设部"]):
+        if first_unit in ["省公司本部", "雄安基地建设部", "华北基地建设部"]:
             return "-12"
-        # 遍历second_orgs列表,匹配单位名称并返回对应的id
         for second_org in second_orgs:
-            if second_org.get('name') in unit:
+            if second_org.get('name') in first_unit:
                 return second_org.get('id')
-        # 如果未匹配到任何规则,返回默认编码"-12"
-        return "-12"
+        return ''
 
-    # 将get_area_no函数应用到DataFrame的'原始一级单位'列,生成'二级组织机构编码'列
-    df['二级组织机构编码'] = df['原始一级单位'].apply(get_area_no)
+    df['二级组织机构编码'] = df['一级单位'].apply(get_area_no)
 
-    # 定义一个函数,用于根据组织机构编码获取组织机构名称
-    def get_org_name(org_no):
+    # 用于根据组织机构编码获取组织机构名称
+    def get_org_name(x):
+        org_no = str(x) if pd.notna(x) else ''
         # 如果编码为空或无效,则返回空字符串
-        if pd.isna(org_no) or not org_no or not org_no.strip():
-            return ""
+        if not org_no or not org_no.strip():
+            return ''
         # 在org_map中查找对应编码的组织机构信息,并返回其名称
         po = org_map.get(org_no)
         if po is not None:
             return po.get('name')
-        return ""
+        return ''
 
     # 将get_org_name函数应用到'二级组织机构编码'列,生成'二级组织机构名称'列
     df['二级组织机构名称'] = df['二级组织机构编码'].apply(get_org_name)
@@ -335,61 +323,69 @@ def data_process():
         # 获取相关字段值,如果为空则设置为""
         area_no = str(x['二级组织机构编码']) if pd.notna(x['二级组织机构编码']) else ""
         area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
-        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
-        # 如果二级组织机构编码或名称为空,则返回""
-        if not area_no or not area_name:
-            return ""
-        # 根据不同的二级组织机构名称和单位内容,返回对应的三级组织机构编码
-        if area_name == "石家庄":
-            if "井陉矿区" in unit:
-                return "D0130185"
-            if "井陉" in unit:
-                return "D0130121"
-        if area_name == "秦皇岛":
-            if "北戴河新区" in unit:
-                return "D0130325"
-            if "北戴河" in unit:
-                return "D0130304"
-        if area_name == "邯郸":
-            if "峰峰" in unit:
-                return "D0130406"
-        if area_name == "邢台":
-            if "内丘" in unit:
-                return "D0130523"
-            if "任泽" in unit:
-                return "D0130526"
+        first_unit = str(x['一级单位']) if pd.notna(x['一级单位']) else ""
+        second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        if not area_no or not area_no.strip() or not area_name or not area_name.strip():
+            return ''
+        if '华北基地建设部' == first_unit:
+            return 'HE018'
+        if '雄安基地建设部' == first_unit:
+            return 'HE019'
+        if second_unit and second_unit.strip():
+            if area_name == "石家庄":
+                if "井陉矿区" in second_unit:
+                    return "D0130185"
+                if "井陉" in second_unit:
+                    return "D0130121"
+            if area_name == "秦皇岛":
+                if "北戴河新区" in second_unit:
+                    return "D0130325"
+                if "北戴河" in second_unit:
+                    return "D0130304"
+            if area_name == "邯郸":
+                if "峰峰" in second_unit:
+                    return "D0130406"
+            if area_name == "邢台":
+                if "内丘" in second_unit:
+                    return "D0130523"
+                if "任泽" in second_unit:
+                    return "D0130526"
+            if area_name == "省机动局":
+                if "沧州" in second_unit:
+                    return "HECS180"
+                if "唐山" in second_unit:
+                    return "HECS181"
+                if "秦皇岛" in second_unit:
+                    return "HECS182"
+                if "廊坊" in second_unit:
+                    return "HECS183"
+                if "张家口" in second_unit:
+                    return "HECS184"
+                if "邢台" in second_unit:
+                    return "HECS185"
+                if "邯郸" in second_unit:
+                    return "HECS186"
+                if "保定" in second_unit:
+                    return "HECS187"
+                if "石家庄" in second_unit:
+                    return "HECS188"
+                if "承德" in second_unit:
+                    return "HECS189"
+                if "衡水" in second_unit:
+                    return "HECS720"
+                if "雄安" in second_unit:
+                    return "HECS728"
+                return "HECS018"
+            if area_name == "雄安":
+                second_unit = second_unit.replace("雄安新区", "")
+            l3 = third_org_list_map.get(area_no, [])
+            for organization_po in l3:
+                if organization_po.get('name') in second_unit:
+                    return organization_po.get('id')
+        if area_name == '省本部':
+            return 'HE001'
         if area_name == "省机动局":
-            if "沧州" in unit:
-                return "HECS180"
-            if "唐山" in unit:
-                return "HECS181"
-            if "秦皇岛" in unit:
-                return "HECS182"
-            if "廊坊" in unit:
-                return "HECS183"
-            if "张家口" in unit:
-                return "HECS184"
-            if "邢台" in unit:
-                return "HECS185"
-            if "邯郸" in unit:
-                return "HECS186"
-            if "保定" in unit:
-                return "HECS187"
-            if "石家庄" in unit:
-                return "HECS188"
-            if "承德" in unit:
-                return "HECS189"
-            if "衡水" in unit:
-                return "HECS720"
-            if "雄安" in unit:
-                return "HECS728"
             return "HECS018"
-        if area_name == "雄安":
-            unit = unit.replace("雄安新区", "")
-        l3 = third_org_list_map.get(area_no, [])
-        for organization_po in l3:
-            if organization_po.get('name') in unit:
-                return organization_po.get('id')
         if area_name == "沧州":
             return "D0130911"
         if area_name == "唐山":
@@ -414,7 +410,7 @@ def data_process():
             return "D0133001"
         if area_name == "雄安":
             return "D0130830"
-        return "HE001"
+        return ''
 
     # 将get_city_no函数应用到DataFrame的每一行,生成'三级组织机构编码'列
     df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
@@ -426,9 +422,8 @@ def data_process():
         # 获取相关字段值,如果为空则设置为""
         area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
         city_name = str(x['三级组织机构名称']) if pd.notna(x['三级组织机构名称']) else ""
-        # 如果二级组织机构名称为空,则返回""
-        if not area_name or not area_name.strip():
-            return ""
+        if not area_name or not area_name.strip() or '省本部' == area_name:
+            return ''
         # 根据二级组织机构名称和三级组织机构名称的内容,返回对应的编码
         if area_name == "省机动局" and city_name and city_name.strip():
             if "沧州" in city_name:
@@ -455,6 +450,7 @@ def data_process():
                 return "720"
             if "雄安" in city_name:
                 return "782"
+            return ''
         if "沧州" in area_name:
             return "180"
         if "唐山" in area_name:
@@ -486,24 +482,26 @@ def data_process():
     # 将get_org_name函数应用到'二级组织机构编码2'列,生成'二级组织机构名称2'列
     df['二级组织机构名称2'] = df['二级组织机构编码2'].apply(get_org_name)
 
-    # 定义一个函数,用于根据单位名称获取城市ID
-    def get_city_id(unit):
-        # 如果单位为空或无效,则返回""
-        if pd.isna(unit) or not unit or not unit.strip():
-            return ""
+    # 获取城市ID
+    def get_city_id(x):
+        raw_first_unit = str(x['原始一级单位']) if pd.notna(x['原始一级单位']) else ""
+        raw_second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        raw_third_unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        unit = f"{raw_first_unit}_{raw_second_unit}_{raw_third_unit}"
+        if not unit or not unit.strip():
+            return ''
         # 遍历cities列表,匹配单位名称并返回对应的城市ID
         for city in cities:
-            if city.get('short_name') and city['short_name'] in unit:
-                return city.get('area_id', "")
-        return ""
+            if city.get('short_name') in unit:
+                return city.get('area_id', '')
+        return ''
 
-    # 将get_city_id函数应用到'原始一级单位'列,生成'city_id'列
-    df['city_id'] = df['原始一级单位'].apply(get_city_id)
+    df['city_id'] = df.apply(get_city_id, axis=1)
 
     # 定义一个函数,用于根据ID获取区域名称
-    def get_area_name(id):
-        # 如果ID为空或无效,则返回""
-        if pd.isna(id) or not id or not id.strip():
+    def get_area_name(x):
+        id = str(x) if pd.notna(x) else ""
+        if not id or not id.strip():
             return ""
         # 在area_map中查找对应ID的区域信息,并返回其名称
         area_po = area_map.get(id)
@@ -519,9 +517,12 @@ def data_process():
         # 获取相关字段值,如果为空则设置为""
         city_id = str(x['city_id']) if pd.notna(x['city_id']) else ""
         city = str(x['city']) if pd.notna(x['city']) else ""
-        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        raw_first_unit = str(x['原始一级单位']) if pd.notna(x['原始一级单位']) else ""
+        raw_second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        raw_third_unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        unit = f"{raw_first_unit}_{raw_second_unit}_{raw_third_unit}"
         # 如果城市ID、城市名称或单位为空,则返回""
-        if not city_id or not city or not unit:
+        if not city_id or not city_id.strip() or not city or not city.strip() or not unit or not unit.strip():
             return ""
         # 根据城市名称和单位内容,返回对应的区县ID
         if city == "石家庄":
@@ -531,15 +532,12 @@ def data_process():
                 return "130121"
         if city == "雄安":
             unit = unit.replace("雄安新区", "")
-        districts = district_list_map.get(city_id)
-        if not districts:
-            return ""
+        districts = district_list_map.get(city_id, [])
         for district in districts:
             if district.get('short_name') in unit:
                 return district.get('area_id')
         return ""
 
-    # 将get_district_id函数应用到DataFrame的每一行,生成'district_id'列
     df['district_id'] = df.apply(get_district_id, axis=1)
     # 将get_area_name函数应用到'district_id'列,生成'district'列
     df['district'] = df['district_id'].apply(get_area_name)

+ 150 - 151
car/car-guo-jian/car_guo_jian.py

@@ -51,6 +51,27 @@ def data_process():
         r"[京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z][A-Z0-9]{4}[A-Z0-9挂学警港澳])"
     )
 
+    # 一级单位字典
+    first_unit_map = {
+        "保定市分公司": "保定",
+        "沧州市分公司": "沧州",
+        "承德市分公司": "承德",
+        "邯郸市分公司": "邯郸",
+        "河北省分公司线路维护中心": "机动局",
+        "河北省机动通信局": "机动局",
+        "衡水市分公司": "衡水",
+        "华北基地建设部": "华北基地建设部",
+        "廊坊市分公司": "廊坊",
+        "秦皇岛市分公司": "秦皇岛",
+        "省公司本部": "省公司本部",
+        "石家庄市分公司": "石家庄",
+        "唐山市分公司": "唐山",
+        "邢台市分公司": "邢台",
+        "雄安基地建设部": "雄安基地建设部",
+        "雄安新区分公司": "雄安",
+        "张家口市分公司": "张家口"
+    }
+
     # 定义二级行政区划映射表(地级市及其下属区县)
     er_ji_map = {
         "石家庄": ["鹿泉", "藁城", "栾城", "井陉矿区", "井陉", "无极", "正定", "元氏", "新乐", "晋州", "平山", "灵寿",
@@ -207,125 +228,92 @@ def data_process():
     # 应用 che_pai_fail 函数生成车牌匹配失败标记列
     df['车牌匹配失败'] = df['车牌号'].apply(che_pai_fail)
 
-    # 定义函数,用于提取一级单位
-    def get_first_unit(unit):
-        # 如果单位为空或无效,则返回空字符串
-        if pd.isna(unit) or not unit or not unit.strip():
-            return ""
-        # 根据单位名称中的关键词返回对应的一级单位
-        if "机动通信局" in unit or "机动局" in unit or "传输局" in unit or "线路维护中心" in unit:
-            return "机动局"
-        if "雄安基地建设部" in unit:
-            return "雄安基地建设部"
-        if "华北基地建设部" in unit:
-            return "华北基地建设部"
-        # 遍历 er_ji_map 的键,寻找匹配的一级单位
-        for yj in er_ji_map.keys():
-            if yj in unit:
-                return yj
-        return "省公司本部"  # 默认返回省公司本部
+    # 获取一级单位
+    def get_first_unit(x):
+        raw_che_pai = x['原始车牌号']
+        raw_first_unit = str(x['原始一级单位']) if pd.notna(x['原始一级单位']) else ""
+        if not raw_first_unit or not raw_first_unit.strip():
+            raise RuntimeError(f"一级单位为空:{raw_che_pai}")
+        if raw_first_unit in first_unit_map:
+            return first_unit_map.get(raw_first_unit)
+        raise RuntimeError(f"一级单位匹配失败:{raw_che_pai} {raw_first_unit}")
 
     # 应用 get_first_unit 函数生成一级单位列
-    df['一级单位'] = df['原始一级单位'].apply(get_first_unit)
+    df['一级单位'] = df.apply(get_first_unit, axis=1)
 
-    # 定义函数,用于提取二级单位
+    # 获取二级单位
     def get_second_unit(x):
-        # 获取一级单位和原始二级单位
         first_unit = str(x['一级单位']) if pd.notna(x['一级单位']) else ""
-        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
-        # 如果二级单位为空或无效,则返回一级单位
-        if not unit or not unit.strip():
+        raw_second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        if first_unit in ["华北基地建设部", "雄安基地建设部", "省公司本部"]:
             return first_unit
-        # 如果一级单位是省公司本部,则返回省公司本部
-        if first_unit == "省公司本部":
-            return first_unit
-        # 如果一级单位是机动局,则根据单位名称进一步细化
+        if not raw_second_unit or not raw_second_unit.strip():
+            return f"{first_unit}本部"
         if first_unit == "机动局":
             for yj in er_ji_map.keys():
-                if yj in unit:
+                if yj in raw_second_unit:
                     return f"机动局{yj}"
             return "机动局本部"
-        # 根据特定城市和关键词返回对应的二级单位
         if first_unit == "石家庄":
-            if "开发区" in unit:
+            if "开发区" in raw_second_unit:
                 return "石家庄开发区"
         if first_unit == "廊坊":
-            if "开发区" in unit:
+            if "开发区" in raw_second_unit:
                 return "廊坊开发区"
         if first_unit == "邢台":
-            if "内丘" in unit:
+            if "内丘" in raw_second_unit:
                 return "内邱"
-            if "任泽" in unit:
+            if "任泽" in raw_second_unit:
                 return "任县"
         if first_unit == "唐山":
-            if "高开区" in unit:
+            if "高开区" in raw_second_unit:
                 return "唐山高开区"
-            if "滦州" in unit:
+            if "滦州" in raw_second_unit:
                 return "滦县"
-        # 根据 er_ji_map 获取二级单位
-        ejs = er_ji_map.get(first_unit)
-        if not ejs:
-            return first_unit
+        ejs = er_ji_map.get(first_unit, [])
         if first_unit == "雄安":
-            unit = unit.replace("雄安新区", "")
+            raw_second_unit = raw_second_unit.replace("雄安新区", "")
         for ej in ejs:
-            if ej in unit:
+            if ej in raw_second_unit:
                 return ej
-        return f"{first_unit}本部"  # 默认返回一级单位本部
+        return f"{first_unit}本部"
 
     # 应用 get_second_unit 函数生成二级单位列
     df['二级单位'] = df.apply(get_second_unit, axis=1)
 
-    # 定义函数,用于提取三级单位
+    # 取三级单位
     def get_third_unit(x):
-        # 获取二级单位和原始三级单位
         second_unit = str(x['二级单位']) if pd.notna(x['二级单位']) else ""
-        unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
-        # 如果三级单位为空或无效,则返回二级单位
-        if not unit or not unit.strip():
-            return second_unit
-        # 按下划线分割三级单位名称
-        a = unit.split("_")
-        if len(a) == 1:
-            return unit
-        if len(a) < 4:
-            return second_unit
-        return a[3]  # 返回分割后的第四个部分作为三级单位
+        raw_third_unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        return raw_third_unit if raw_third_unit and raw_third_unit.strip() else second_unit
 
     # 应用 get_third_unit 函数生成三级单位列
     df['三级单位'] = df.apply(get_third_unit, axis=1)
 
     # 定义一个函数,用于根据单位名称获取二级组织机构编码
-    def get_area_no(unit):
-        # 如果单位为空或无效,则返回空字符串
-        if pd.isna(unit) or not unit or not unit.strip():
-            return ""
-        # 如果单位包含特定关键词(如“机动通信局”等),返回固定编码"-11"
-        if any(keyword in unit for keyword in ["机动通信局", "机动局", "传输局", "线路维护中心"]):
+    def get_area_no(first_unit):
+        if first_unit == "机动局":
             return "-11"
-        # 如果单位包含特定关键词(如“省公司本部”等),返回固定编码"-12"
-        if any(keyword in unit for keyword in ["省公司本部", "雄安基地建设部", "华北基地建设部"]):
+        if first_unit in ["省公司本部", "雄安基地建设部", "华北基地建设部"]:
             return "-12"
-        # 遍历second_orgs列表,匹配单位名称并返回对应的id
         for second_org in second_orgs:
-            if second_org.get('name') in unit:
+            if second_org.get('name') in first_unit:
                 return second_org.get('id')
-        # 如果未匹配到任何规则,返回默认编码"-12"
-        return "-12"
+        return ''
 
-    # 将get_area_no函数应用到DataFrame的'原始一级单位'列,生成'二级组织机构编码'列
-    df['二级组织机构编码'] = df['原始一级单位'].apply(get_area_no)
+    df['二级组织机构编码'] = df['一级单位'].apply(get_area_no)
 
-    # 定义一个函数,用于根据组织机构编码获取组织机构名称
-    def get_org_name(org_no):
+    # 用于根据组织机构编码获取组织机构名称
+    def get_org_name(x):
+        org_no = str(x) if pd.notna(x) else ''
         # 如果编码为空或无效,则返回空字符串
-        if pd.isna(org_no) or not org_no or not org_no.strip():
-            return ""
+        if not org_no or not org_no.strip():
+            return ''
         # 在org_map中查找对应编码的组织机构信息,并返回其名称
         po = org_map.get(org_no)
         if po is not None:
             return po.get('name')
-        return ""
+        return ''
 
     # 将get_org_name函数应用到'二级组织机构编码'列,生成'二级组织机构名称'列
     df['二级组织机构名称'] = df['二级组织机构编码'].apply(get_org_name)
@@ -335,61 +323,69 @@ def data_process():
         # 获取相关字段值,如果为空则设置为""
         area_no = str(x['二级组织机构编码']) if pd.notna(x['二级组织机构编码']) else ""
         area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
-        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
-        # 如果二级组织机构编码或名称为空,则返回""
-        if not area_no or not area_name:
-            return ""
-        # 根据不同的二级组织机构名称和单位内容,返回对应的三级组织机构编码
-        if area_name == "石家庄":
-            if "井陉矿区" in unit:
-                return "D0130185"
-            if "井陉" in unit:
-                return "D0130121"
-        if area_name == "秦皇岛":
-            if "北戴河新区" in unit:
-                return "D0130325"
-            if "北戴河" in unit:
-                return "D0130304"
-        if area_name == "邯郸":
-            if "峰峰" in unit:
-                return "D0130406"
-        if area_name == "邢台":
-            if "内丘" in unit:
-                return "D0130523"
-            if "任泽" in unit:
-                return "D0130526"
+        first_unit = str(x['一级单位']) if pd.notna(x['一级单位']) else ""
+        second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        if not area_no or not area_no.strip() or not area_name or not area_name.strip():
+            return ''
+        if '华北基地建设部' == first_unit:
+            return 'HE018'
+        if '雄安基地建设部' == first_unit:
+            return 'HE019'
+        if second_unit and second_unit.strip():
+            if area_name == "石家庄":
+                if "井陉矿区" in second_unit:
+                    return "D0130185"
+                if "井陉" in second_unit:
+                    return "D0130121"
+            if area_name == "秦皇岛":
+                if "北戴河新区" in second_unit:
+                    return "D0130325"
+                if "北戴河" in second_unit:
+                    return "D0130304"
+            if area_name == "邯郸":
+                if "峰峰" in second_unit:
+                    return "D0130406"
+            if area_name == "邢台":
+                if "内丘" in second_unit:
+                    return "D0130523"
+                if "任泽" in second_unit:
+                    return "D0130526"
+            if area_name == "省机动局":
+                if "沧州" in second_unit:
+                    return "HECS180"
+                if "唐山" in second_unit:
+                    return "HECS181"
+                if "秦皇岛" in second_unit:
+                    return "HECS182"
+                if "廊坊" in second_unit:
+                    return "HECS183"
+                if "张家口" in second_unit:
+                    return "HECS184"
+                if "邢台" in second_unit:
+                    return "HECS185"
+                if "邯郸" in second_unit:
+                    return "HECS186"
+                if "保定" in second_unit:
+                    return "HECS187"
+                if "石家庄" in second_unit:
+                    return "HECS188"
+                if "承德" in second_unit:
+                    return "HECS189"
+                if "衡水" in second_unit:
+                    return "HECS720"
+                if "雄安" in second_unit:
+                    return "HECS728"
+                return "HECS018"
+            if area_name == "雄安":
+                second_unit = second_unit.replace("雄安新区", "")
+            l3 = third_org_list_map.get(area_no, [])
+            for organization_po in l3:
+                if organization_po.get('name') in second_unit:
+                    return organization_po.get('id')
+        if area_name == '省本部':
+            return 'HE001'
         if area_name == "省机动局":
-            if "沧州" in unit:
-                return "HECS180"
-            if "唐山" in unit:
-                return "HECS181"
-            if "秦皇岛" in unit:
-                return "HECS182"
-            if "廊坊" in unit:
-                return "HECS183"
-            if "张家口" in unit:
-                return "HECS184"
-            if "邢台" in unit:
-                return "HECS185"
-            if "邯郸" in unit:
-                return "HECS186"
-            if "保定" in unit:
-                return "HECS187"
-            if "石家庄" in unit:
-                return "HECS188"
-            if "承德" in unit:
-                return "HECS189"
-            if "衡水" in unit:
-                return "HECS720"
-            if "雄安" in unit:
-                return "HECS728"
             return "HECS018"
-        if area_name == "雄安":
-            unit = unit.replace("雄安新区", "")
-        l3 = third_org_list_map.get(area_no, [])
-        for organization_po in l3:
-            if organization_po.get('name') in unit:
-                return organization_po.get('id')
         if area_name == "沧州":
             return "D0130911"
         if area_name == "唐山":
@@ -414,7 +410,7 @@ def data_process():
             return "D0133001"
         if area_name == "雄安":
             return "D0130830"
-        return "HE001"
+        return ''
 
     # 将get_city_no函数应用到DataFrame的每一行,生成'三级组织机构编码'列
     df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
@@ -426,9 +422,8 @@ def data_process():
         # 获取相关字段值,如果为空则设置为""
         area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
         city_name = str(x['三级组织机构名称']) if pd.notna(x['三级组织机构名称']) else ""
-        # 如果二级组织机构名称为空,则返回""
-        if not area_name or not area_name.strip():
-            return ""
+        if not area_name or not area_name.strip() or '省本部' == area_name:
+            return ''
         # 根据二级组织机构名称和三级组织机构名称的内容,返回对应的编码
         if area_name == "省机动局" and city_name and city_name.strip():
             if "沧州" in city_name:
@@ -455,6 +450,7 @@ def data_process():
                 return "720"
             if "雄安" in city_name:
                 return "782"
+            return ''
         if "沧州" in area_name:
             return "180"
         if "唐山" in area_name:
@@ -486,24 +482,26 @@ def data_process():
     # 将get_org_name函数应用到'二级组织机构编码2'列,生成'二级组织机构名称2'列
     df['二级组织机构名称2'] = df['二级组织机构编码2'].apply(get_org_name)
 
-    # 定义一个函数,用于根据单位名称获取城市ID
-    def get_city_id(unit):
-        # 如果单位为空或无效,则返回""
-        if pd.isna(unit) or not unit or not unit.strip():
-            return ""
+    # 获取城市ID
+    def get_city_id(x):
+        raw_first_unit = str(x['原始一级单位']) if pd.notna(x['原始一级单位']) else ""
+        raw_second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        raw_third_unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        unit = f"{raw_first_unit}_{raw_second_unit}_{raw_third_unit}"
+        if not unit or not unit.strip():
+            return ''
         # 遍历cities列表,匹配单位名称并返回对应的城市ID
         for city in cities:
-            if city.get('short_name') and city['short_name'] in unit:
-                return city.get('area_id', "")
-        return ""
+            if city.get('short_name') in unit:
+                return city.get('area_id', '')
+        return ''
 
-    # 将get_city_id函数应用到'原始一级单位'列,生成'city_id'列
-    df['city_id'] = df['原始一级单位'].apply(get_city_id)
+    df['city_id'] = df.apply(get_city_id, axis=1)
 
     # 定义一个函数,用于根据ID获取区域名称
-    def get_area_name(id):
-        # 如果ID为空或无效,则返回""
-        if pd.isna(id) or not id or not id.strip():
+    def get_area_name(x):
+        id = str(x) if pd.notna(x) else ""
+        if not id or not id.strip():
             return ""
         # 在area_map中查找对应ID的区域信息,并返回其名称
         area_po = area_map.get(id)
@@ -519,9 +517,12 @@ def data_process():
         # 获取相关字段值,如果为空则设置为""
         city_id = str(x['city_id']) if pd.notna(x['city_id']) else ""
         city = str(x['city']) if pd.notna(x['city']) else ""
-        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        raw_first_unit = str(x['原始一级单位']) if pd.notna(x['原始一级单位']) else ""
+        raw_second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        raw_third_unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        unit = f"{raw_first_unit}_{raw_second_unit}_{raw_third_unit}"
         # 如果城市ID、城市名称或单位为空,则返回""
-        if not city_id or not city or not unit:
+        if not city_id or not city_id.strip() or not city or not city.strip() or not unit or not unit.strip():
             return ""
         # 根据城市名称和单位内容,返回对应的区县ID
         if city == "石家庄":
@@ -531,15 +532,12 @@ def data_process():
                 return "130121"
         if city == "雄安":
             unit = unit.replace("雄安新区", "")
-        districts = district_list_map.get(city_id)
-        if not districts:
-            return ""
+        districts = district_list_map.get(city_id, [])
         for district in districts:
             if district.get('short_name') in unit:
                 return district.get('area_id')
         return ""
 
-    # 将get_district_id函数应用到DataFrame的每一行,生成'district_id'列
     df['district_id'] = df.apply(get_district_id, axis=1)
     # 将get_area_name函数应用到'district_id'列,生成'district'列
     df['district'] = df['district_id'].apply(get_area_name)
@@ -582,7 +580,8 @@ def data_import():
         capture_output=True,  # 捕获标准输出和标准错误
     )
     # 打印命令执行的结果,包括返回码、标准输出和标准错误
-    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout, completed_process.stderr)
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
     # 定义正则表达式,用于匹配标准输出中的 COPY 结果
     p = re.compile(r"^(COPY) (\d+)$")
     count = None  # 初始化计数变量

+ 156 - 157
car/car-wei-zhang/car_wei_zhang.py

@@ -20,11 +20,11 @@ ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
 remote_dir_path = '/data/history/car/wei-zhang/'
 # 数据库连接信息
 db_host = "172.16.107.5"  # 数据库主机地址
-db_port = 5432         # 数据库端口号
+db_port = 5432  # 数据库端口号
 db_username = "finance"  # 数据库用户名
 db_password = "Finance@unicom23"  # 数据库密码
-dbname = "financialdb"       # 数据库名称
-conn_info= f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+dbname = "financialdb"  # 数据库名称
+conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
 # 获取当前日期,并计算上个月的第一天
 today = datetime.today()
 start_date = today - relativedelta(months=1, day=1)
@@ -51,6 +51,27 @@ def data_process():
         r"[京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z][A-Z0-9]{4}[A-Z0-9挂学警港澳])"
     )
 
+    # 一级单位字典
+    first_unit_map = {
+        "保定市分公司": "保定",
+        "沧州市分公司": "沧州",
+        "承德市分公司": "承德",
+        "邯郸市分公司": "邯郸",
+        "河北省分公司线路维护中心": "机动局",
+        "河北省机动通信局": "机动局",
+        "衡水市分公司": "衡水",
+        "华北基地建设部": "华北基地建设部",
+        "廊坊市分公司": "廊坊",
+        "秦皇岛市分公司": "秦皇岛",
+        "省公司本部": "省公司本部",
+        "石家庄市分公司": "石家庄",
+        "唐山市分公司": "唐山",
+        "邢台市分公司": "邢台",
+        "雄安基地建设部": "雄安基地建设部",
+        "雄安新区分公司": "雄安",
+        "张家口市分公司": "张家口"
+    }
+
     # 定义二级行政区划映射表(地级市及其下属区县)
     er_ji_map = {
         "石家庄": ["鹿泉", "藁城", "栾城", "井陉矿区", "井陉", "无极", "正定", "元氏", "新乐", "晋州", "平山", "灵寿",
@@ -205,125 +226,92 @@ def data_process():
     # 应用 che_pai_fail 函数生成车牌匹配失败标记列
     df['车牌匹配失败'] = df['车牌号'].apply(che_pai_fail)
 
-    # 定义函数,用于提取一级单位
-    def get_first_unit(unit):
-        # 如果单位为空或无效,则返回空字符串
-        if pd.isna(unit) or not unit or not unit.strip():
-            return ""
-        # 根据单位名称中的关键词返回对应的一级单位
-        if "机动通信局" in unit or "机动局" in unit or "传输局" in unit or "线路维护中心" in unit:
-            return "机动局"
-        if "雄安基地建设部" in unit:
-            return "雄安基地建设部"
-        if "华北基地建设部" in unit:
-            return "华北基地建设部"
-        # 遍历 er_ji_map 的键,寻找匹配的一级单位
-        for yj in er_ji_map.keys():
-            if yj in unit:
-                return yj
-        return "省公司本部"  # 默认返回省公司本部
+    # 获取一级单位
+    def get_first_unit(x):
+        raw_che_pai = x['原始车牌号']
+        raw_first_unit = str(x['原始一级单位']) if pd.notna(x['原始一级单位']) else ""
+        if not raw_first_unit or not raw_first_unit.strip():
+            raise RuntimeError(f"一级单位为空:{raw_che_pai}")
+        if raw_first_unit in first_unit_map:
+            return first_unit_map.get(raw_first_unit)
+        raise RuntimeError(f"一级单位匹配失败:{raw_che_pai} {raw_first_unit}")
 
     # 应用 get_first_unit 函数生成一级单位列
-    df['一级单位'] = df['原始一级单位'].apply(get_first_unit)
+    df['一级单位'] = df.apply(get_first_unit, axis=1)
 
-    # 定义函数,用于提取二级单位
+    # 获取二级单位
     def get_second_unit(x):
-        # 获取一级单位和原始二级单位
         first_unit = str(x['一级单位']) if pd.notna(x['一级单位']) else ""
-        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
-        # 如果二级单位为空或无效,则返回一级单位
-        if not unit or not unit.strip():
+        raw_second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        if first_unit in ["华北基地建设部", "雄安基地建设部", "省公司本部"]:
             return first_unit
-        # 如果一级单位是省公司本部,则返回省公司本部
-        if first_unit == "省公司本部":
-            return first_unit
-        # 如果一级单位是机动局,则根据单位名称进一步细化
+        if not raw_second_unit or not raw_second_unit.strip():
+            return f"{first_unit}本部"
         if first_unit == "机动局":
             for yj in er_ji_map.keys():
-                if yj in unit:
+                if yj in raw_second_unit:
                     return f"机动局{yj}"
             return "机动局本部"
-        # 根据特定城市和关键词返回对应的二级单位
         if first_unit == "石家庄":
-            if "开发区" in unit:
+            if "开发区" in raw_second_unit:
                 return "石家庄开发区"
         if first_unit == "廊坊":
-            if "开发区" in unit:
+            if "开发区" in raw_second_unit:
                 return "廊坊开发区"
         if first_unit == "邢台":
-            if "内丘" in unit:
+            if "内丘" in raw_second_unit:
                 return "内邱"
-            if "任泽" in unit:
+            if "任泽" in raw_second_unit:
                 return "任县"
         if first_unit == "唐山":
-            if "高开区" in unit:
+            if "高开区" in raw_second_unit:
                 return "唐山高开区"
-            if "滦州" in unit:
+            if "滦州" in raw_second_unit:
                 return "滦县"
-        # 根据 er_ji_map 获取二级单位
-        ejs = er_ji_map.get(first_unit)
-        if not ejs:
-            return first_unit
+        ejs = er_ji_map.get(first_unit, [])
         if first_unit == "雄安":
-            unit = unit.replace("雄安新区", "")
+            raw_second_unit = raw_second_unit.replace("雄安新区", "")
         for ej in ejs:
-            if ej in unit:
+            if ej in raw_second_unit:
                 return ej
-        return f"{first_unit}本部"  # 默认返回一级单位本部
+        return f"{first_unit}本部"
 
     # 应用 get_second_unit 函数生成二级单位列
     df['二级单位'] = df.apply(get_second_unit, axis=1)
 
-    # 定义函数,用于提取三级单位
+    # 取三级单位
     def get_third_unit(x):
-        # 获取二级单位和原始三级单位
         second_unit = str(x['二级单位']) if pd.notna(x['二级单位']) else ""
-        unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
-        # 如果三级单位为空或无效,则返回二级单位
-        if not unit or not unit.strip():
-            return second_unit
-        # 按下划线分割三级单位名称
-        a = unit.split("_")
-        if len(a) == 1:
-            return unit
-        if len(a) < 4:
-            return second_unit
-        return a[3]  # 返回分割后的第四个部分作为三级单位
+        raw_third_unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        return raw_third_unit if raw_third_unit and raw_third_unit.strip() else second_unit
 
     # 应用 get_third_unit 函数生成三级单位列
     df['三级单位'] = df.apply(get_third_unit, axis=1)
 
     # 定义一个函数,用于根据单位名称获取二级组织机构编码
-    def get_area_no(unit):
-        # 如果单位为空或无效,则返回空字符串
-        if pd.isna(unit) or not unit or not unit.strip():
-            return ""
-        # 如果单位包含特定关键词(如“机动通信局”等),返回固定编码"-11"
-        if any(keyword in unit for keyword in ["机动通信局", "机动局", "传输局", "线路维护中心"]):
+    def get_area_no(first_unit):
+        if first_unit == "机动局":
             return "-11"
-        # 如果单位包含特定关键词(如“省公司本部”等),返回固定编码"-12"
-        if any(keyword in unit for keyword in ["省公司本部", "雄安基地建设部", "华北基地建设部"]):
+        if first_unit in ["省公司本部", "雄安基地建设部", "华北基地建设部"]:
             return "-12"
-        # 遍历second_orgs列表,匹配单位名称并返回对应的id
         for second_org in second_orgs:
-            if second_org.get('name') in unit:
+            if second_org.get('name') in first_unit:
                 return second_org.get('id')
-        # 如果未匹配到任何规则,返回默认编码"-12"
-        return "-12"
+        return ''
 
-    # 将get_area_no函数应用到DataFrame的'原始一级单位'列,生成'二级组织机构编码'列
-    df['二级组织机构编码'] = df['原始一级单位'].apply(get_area_no)
+    df['二级组织机构编码'] = df['一级单位'].apply(get_area_no)
 
-    # 定义一个函数,用于根据组织机构编码获取组织机构名称
-    def get_org_name(org_no):
+    # 用于根据组织机构编码获取组织机构名称
+    def get_org_name(x):
+        org_no = str(x) if pd.notna(x) else ''
         # 如果编码为空或无效,则返回空字符串
-        if pd.isna(org_no) or not org_no or not org_no.strip():
-            return ""
+        if not org_no or not org_no.strip():
+            return ''
         # 在org_map中查找对应编码的组织机构信息,并返回其名称
         po = org_map.get(org_no)
         if po is not None:
             return po.get('name')
-        return ""
+        return ''
 
     # 将get_org_name函数应用到'二级组织机构编码'列,生成'二级组织机构名称'列
     df['二级组织机构名称'] = df['二级组织机构编码'].apply(get_org_name)
@@ -333,61 +321,69 @@ def data_process():
         # 获取相关字段值,如果为空则设置为""
         area_no = str(x['二级组织机构编码']) if pd.notna(x['二级组织机构编码']) else ""
         area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
-        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
-        # 如果二级组织机构编码或名称为空,则返回""
-        if not area_no or not area_name:
-            return ""
-        # 根据不同的二级组织机构名称和单位内容,返回对应的三级组织机构编码
-        if area_name == "石家庄":
-            if "井陉矿区" in unit:
-                return "D0130185"
-            if "井陉" in unit:
-                return "D0130121"
-        if area_name == "秦皇岛":
-            if "北戴河新区" in unit:
-                return "D0130325"
-            if "北戴河" in unit:
-                return "D0130304"
-        if area_name == "邯郸":
-            if "峰峰" in unit:
-                return "D0130406"
-        if area_name == "邢台":
-            if "内丘" in unit:
-                return "D0130523"
-            if "任泽" in unit:
-                return "D0130526"
+        first_unit = str(x['一级单位']) if pd.notna(x['一级单位']) else ""
+        second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        if not area_no or not area_no.strip() or not area_name or not area_name.strip():
+            return ''
+        if '华北基地建设部' == first_unit:
+            return 'HE018'
+        if '雄安基地建设部' == first_unit:
+            return 'HE019'
+        if second_unit and second_unit.strip():
+            if area_name == "石家庄":
+                if "井陉矿区" in second_unit:
+                    return "D0130185"
+                if "井陉" in second_unit:
+                    return "D0130121"
+            if area_name == "秦皇岛":
+                if "北戴河新区" in second_unit:
+                    return "D0130325"
+                if "北戴河" in second_unit:
+                    return "D0130304"
+            if area_name == "邯郸":
+                if "峰峰" in second_unit:
+                    return "D0130406"
+            if area_name == "邢台":
+                if "内丘" in second_unit:
+                    return "D0130523"
+                if "任泽" in second_unit:
+                    return "D0130526"
+            if area_name == "省机动局":
+                if "沧州" in second_unit:
+                    return "HECS180"
+                if "唐山" in second_unit:
+                    return "HECS181"
+                if "秦皇岛" in second_unit:
+                    return "HECS182"
+                if "廊坊" in second_unit:
+                    return "HECS183"
+                if "张家口" in second_unit:
+                    return "HECS184"
+                if "邢台" in second_unit:
+                    return "HECS185"
+                if "邯郸" in second_unit:
+                    return "HECS186"
+                if "保定" in second_unit:
+                    return "HECS187"
+                if "石家庄" in second_unit:
+                    return "HECS188"
+                if "承德" in second_unit:
+                    return "HECS189"
+                if "衡水" in second_unit:
+                    return "HECS720"
+                if "雄安" in second_unit:
+                    return "HECS728"
+                return "HECS018"
+            if area_name == "雄安":
+                second_unit = second_unit.replace("雄安新区", "")
+            l3 = third_org_list_map.get(area_no, [])
+            for organization_po in l3:
+                if organization_po.get('name') in second_unit:
+                    return organization_po.get('id')
+        if area_name == '省本部':
+            return 'HE001'
         if area_name == "省机动局":
-            if "沧州" in unit:
-                return "HECS180"
-            if "唐山" in unit:
-                return "HECS181"
-            if "秦皇岛" in unit:
-                return "HECS182"
-            if "廊坊" in unit:
-                return "HECS183"
-            if "张家口" in unit:
-                return "HECS184"
-            if "邢台" in unit:
-                return "HECS185"
-            if "邯郸" in unit:
-                return "HECS186"
-            if "保定" in unit:
-                return "HECS187"
-            if "石家庄" in unit:
-                return "HECS188"
-            if "承德" in unit:
-                return "HECS189"
-            if "衡水" in unit:
-                return "HECS720"
-            if "雄安" in unit:
-                return "HECS728"
             return "HECS018"
-        if area_name == "雄安":
-            unit = unit.replace("雄安新区", "")
-        l3 = third_org_list_map.get(area_no, [])
-        for organization_po in l3:
-            if organization_po.get('name') in unit:
-                return organization_po.get('id')
         if area_name == "沧州":
             return "D0130911"
         if area_name == "唐山":
@@ -412,7 +408,7 @@ def data_process():
             return "D0133001"
         if area_name == "雄安":
             return "D0130830"
-        return "HE001"
+        return ''
 
     # 将get_city_no函数应用到DataFrame的每一行,生成'三级组织机构编码'列
     df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
@@ -424,9 +420,8 @@ def data_process():
         # 获取相关字段值,如果为空则设置为""
         area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
         city_name = str(x['三级组织机构名称']) if pd.notna(x['三级组织机构名称']) else ""
-        # 如果二级组织机构名称为空,则返回""
-        if not area_name or not area_name.strip():
-            return ""
+        if not area_name or not area_name.strip() or '省本部' == area_name:
+            return ''
         # 根据二级组织机构名称和三级组织机构名称的内容,返回对应的编码
         if area_name == "省机动局" and city_name and city_name.strip():
             if "沧州" in city_name:
@@ -453,6 +448,7 @@ def data_process():
                 return "720"
             if "雄安" in city_name:
                 return "782"
+            return ''
         if "沧州" in area_name:
             return "180"
         if "唐山" in area_name:
@@ -484,24 +480,26 @@ def data_process():
     # 将get_org_name函数应用到'二级组织机构编码2'列,生成'二级组织机构名称2'列
     df['二级组织机构名称2'] = df['二级组织机构编码2'].apply(get_org_name)
 
-    # 定义一个函数,用于根据单位名称获取城市ID
-    def get_city_id(unit):
-        # 如果单位为空或无效,则返回""
-        if pd.isna(unit) or not unit or not unit.strip():
-            return ""
+    # 获取城市ID
+    def get_city_id(x):
+        raw_first_unit = str(x['原始一级单位']) if pd.notna(x['原始一级单位']) else ""
+        raw_second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        raw_third_unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        unit = f"{raw_first_unit}_{raw_second_unit}_{raw_third_unit}"
+        if not unit or not unit.strip():
+            return ''
         # 遍历cities列表,匹配单位名称并返回对应的城市ID
         for city in cities:
-            if city.get('short_name') and city['short_name'] in unit:
-                return city.get('area_id', "")
-        return ""
+            if city.get('short_name') in unit:
+                return city.get('area_id', '')
+        return ''
 
-    # 将get_city_id函数应用到'原始一级单位'列,生成'city_id'列
-    df['city_id'] = df['原始一级单位'].apply(get_city_id)
+    df['city_id'] = df.apply(get_city_id, axis=1)
 
     # 定义一个函数,用于根据ID获取区域名称
-    def get_area_name(id):
-        # 如果ID为空或无效,则返回""
-        if pd.isna(id) or not id or not id.strip():
+    def get_area_name(x):
+        id = str(x) if pd.notna(x) else ""
+        if not id or not id.strip():
             return ""
         # 在area_map中查找对应ID的区域信息,并返回其名称
         area_po = area_map.get(id)
@@ -517,9 +515,12 @@ def data_process():
         # 获取相关字段值,如果为空则设置为""
         city_id = str(x['city_id']) if pd.notna(x['city_id']) else ""
         city = str(x['city']) if pd.notna(x['city']) else ""
-        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        raw_first_unit = str(x['原始一级单位']) if pd.notna(x['原始一级单位']) else ""
+        raw_second_unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        raw_third_unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        unit = f"{raw_first_unit}_{raw_second_unit}_{raw_third_unit}"
         # 如果城市ID、城市名称或单位为空,则返回""
-        if not city_id or not city or not unit:
+        if not city_id or not city_id.strip() or not city or not city.strip() or not unit or not unit.strip():
             return ""
         # 根据城市名称和单位内容,返回对应的区县ID
         if city == "石家庄":
@@ -529,15 +530,12 @@ def data_process():
                 return "130121"
         if city == "雄安":
             unit = unit.replace("雄安新区", "")
-        districts = district_list_map.get(city_id)
-        if not districts:
-            return ""
+        districts = district_list_map.get(city_id, [])
         for district in districts:
             if district.get('short_name') in unit:
                 return district.get('area_id')
         return ""
 
-    # 将get_district_id函数应用到DataFrame的每一行,生成'district_id'列
     df['district_id'] = df.apply(get_district_id, axis=1)
     # 将get_area_name函数应用到'district_id'列,生成'district'列
     df['district'] = df['district_id'].apply(get_area_name)
@@ -578,13 +576,14 @@ def data_import():
     logger.info("command: {}", command)
     # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
     completed_process = subprocess.run(
-        command,             # 执行的命令
-        check=False,          # 如果命令执行失败,不抛出异常
-        text=True,           # 将输出作为字符串处理
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
         capture_output=True,  # 捕获标准输出和标准错误
     )
     # 打印命令执行的结果,包括返回码、标准输出和标准错误
-    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout, completed_process.stderr)
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
     # 定义正则表达式,用于匹配标准输出中的 COPY 结果
     p = re.compile(r"^(COPY) (\d+)$")
     count = None  # 初始化计数变量