house_building.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803
  1. """不动产建筑数据处理
  2. """
  3. # 导入必要的库
  4. import re # 正则表达式库,用于字符串处理
  5. from datetime import datetime # 日期时间库,用于处理日期和时间
  6. from dateutil.relativedelta import relativedelta # 日期时间相对偏移库,用于计算相对日期
  7. from decimal import Decimal # 高精度小数库,用于精确的数值计算
  8. from loguru import logger # 日志库,用于记录日志信息
  9. import pandas as pd # 数据分析库,用于处理数据框
  10. import psycopg # PostgreSQL数据库连接库,用于与PostgreSQL交互
  11. import subprocess
  12. import paramiko
  13. # 配置日志记录器,将日志写入文件a.log
  14. logger.add(sink='a.log')
  15. ssh_hostname = '172.16.107.4' # 定义远程主机地址
  16. ssh_port = 22 # 定义SSH服务的端口号
  17. ssh_username = 'app' # 定义登录远程主机的用户名
  18. ssh_password = '(l4w0ST_' # 定义登录远程主机的密码
  19. # 服务器文件夹路径
  20. remote_dir_path = '/data/history/house/building/'
  21. # 数据库连接信息
  22. db_host = "172.16.107.5" # 数据库主机地址
  23. db_port = 5432 # 数据库端口号
  24. db_username = "finance" # 数据库用户名
  25. db_password = "Finance@unicom23" # 数据库密码
  26. dbname = "financialdb" # 数据库名称
  27. conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
  28. # 获取当前日期,并计算上个月的第一天
  29. today = datetime.today()
  30. start_date = today - relativedelta(months=1, day=1)
  31. year_month = start_date.strftime('%Y%m')
  32. # 数据文件路径
  33. input_path = 'data.xlsx'
  34. # 输出文件路径
  35. output_path = 'output.csv'
  36. def data_process():
  37. # 初始化全局变量,用于存储组织、区域等映射关系
  38. org_map = {} # 组织ID到组织信息的映射
  39. third_org_list_map = {} # 二级组织ID到其下属三级组织列表的映射
  40. area_map = {} # 区域ID到区域信息的映射
  41. districts_list_map = {} # 城市ID到其下属区县列表的映射
  42. # 连接到PostgreSQL数据库,并获取组织和区域数据
  43. with psycopg.connect(
  44. conninfo=conn_info,
  45. row_factory=psycopg.rows.dict_row
  46. ) as conn:
  47. with conn.cursor() as curs:
  48. # 查询所有一级组织(grade=1)
  49. sql = """
  50. select * from common.organization where grade = 1
  51. """
  52. logger.info(f"sql: {sql}")
  53. curs.execute(sql)
  54. second_orgs = curs.fetchall()
  55. for x in second_orgs:
  56. third_org_list_map[x['id']] = [] # 初始化每个二级组织的三级组织列表为空
  57. # 查询所有组织
  58. sql = """
  59. select * from common.organization
  60. """
  61. logger.info(f"sql: {sql}")
  62. curs.execute(sql)
  63. orgs = curs.fetchall()
  64. for x in orgs:
  65. if x['parent_id'] in third_org_list_map:
  66. third_org_list_map[x['parent_id']].append(x) # 将三级组织添加到对应的二级组织列表中
  67. org_map[x['id']] = x # 构建组织ID到组织信息的映射
  68. # 查询所有省级区域(area_grade=1)
  69. sql = """
  70. select * from common.area where area_grade = 1 order by area_id
  71. """
  72. logger.info(f"sql: {sql}")
  73. curs.execute(sql)
  74. cities = curs.fetchall()
  75. for x in cities:
  76. districts_list_map[x['area_id']] = [] # 初始化每个城市的区县列表为空
  77. # 查询所有区域
  78. sql = """
  79. select * from common.area
  80. """
  81. logger.info(f"sql: {sql}")
  82. curs.execute(sql)
  83. areas = curs.fetchall()
  84. for x in areas:
  85. if x['parent_id'] in districts_list_map:
  86. districts_list_map[x['parent_id']].append(x) # 将区县添加到对应的城市列表中
  87. area_map[x['area_id']] = x # 构建区域ID到区域信息的映射
  88. # 读取Excel文件中的数据并进行预处理
  89. df = pd.read_excel(io=input_path) # 读取Excel文件
  90. # 获取当前 DataFrame 的列名列表
  91. columns = df.columns.tolist()
  92. # 定义所需的字段列表
  93. required_columns = ['资产所属单位(一级)', '资产所属单位(二级)', '资产所属单位(三级)', '建筑别名', '盘点状态',
  94. '盘点情况', '是否修改', '建筑是否存在待核对信息', '楼层房间是否存在待核对信息', '建筑ID',
  95. '得房率', '上级局址名称', '局址ID', '上级土地名称', '房屋来源', '取得日期', '房龄开始年份',
  96. '投资主体', '管理层级', '房屋结构', '楼层总数', '是否临街', '是否有院落', '整栋是否独有',
  97. '是否有房产证', '无房产证原因', '是否有资产卡片', '资产编号', '资产标签号', '使用状态',
  98. '建筑用途', '权属状态', '建筑占地面积(㎡)', '建筑面积(㎡)', '建筑面积-自用(㎡)',
  99. '建筑面积-出租(㎡)', '建筑面积-闲置(㎡)', '建筑面积-不可使用(㎡)', '使用面积(㎡)',
  100. '使用面积-自用(㎡)', '使用面积-出租(㎡)', '使用面积-闲置(㎡)', '使用面积-不可使用(㎡)', '楼长姓名',
  101. '楼长所在单位', '经度', '纬度', '标准地址', '实际产权人', '是否完成清查']
  102. # 检查是否有缺失的字段
  103. missing_columns = [col for col in required_columns if col not in columns]
  104. # 检查是否有多余的字段
  105. ex_columns = [col for col in columns if col not in required_columns]
  106. # 如果存在缺失字段,则抛出运行时错误并提示缺少哪些字段
  107. if missing_columns or ex_columns:
  108. raise RuntimeError(f"缺少以下字段: {missing_columns};存在以下多余字段:{ex_columns}")
  109. df = df.map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x) # 去除字符串字段中的多余空格
  110. df.drop_duplicates(subset=['建筑ID'], keep='last', inplace=True) # 去重,保留最后一条记录
  111. # 定义函数:根据资产所属单位获取二级组织机构编码
  112. def get_area_no(x):
  113. second_unit = x['资产所属单位(二级)']
  114. third_unit = x['资产所属单位(三级)']
  115. if '河北' == second_unit:
  116. return '-12'
  117. if '长途通信传输局' == second_unit:
  118. return '-11'
  119. if '保定' in second_unit and ('雄县' in third_unit or '容城' in third_unit or '安新' in third_unit):
  120. return '782'
  121. for second_org in second_orgs:
  122. area_name = second_org['name']
  123. area_no = second_org['id']
  124. if area_name in second_unit:
  125. return area_no
  126. raise RuntimeError(f'二级组织机构编码匹配失败: {second_unit}')
  127. df['area_no'] = df.apply(get_area_no, axis=1) # 应用函数,生成二级组织机构编码列
  128. # 定义函数:根据二级组织机构编码获取二级组织机构名称
  129. def get_area_name(x):
  130. area_no = x['area_no']
  131. second_org = org_map[area_no]
  132. area_name = second_org['name']
  133. return area_name
  134. df['area_name'] = df.apply(get_area_name, axis=1) # 应用函数,生成二级组织机构名称列
  135. # 定义函数:根据资产所属单位获取三级组织机构编码
  136. def get_city_no(x):
  137. third_unit = x['资产所属单位(三级)']
  138. area_name = x['area_name']
  139. area_no = x['area_no']
  140. if area_name == '石家庄':
  141. if '矿区' in third_unit:
  142. return 'D0130185'
  143. if '井陉' in third_unit:
  144. return 'D0130121'
  145. if area_name == '秦皇岛':
  146. if '北戴河新区' in third_unit:
  147. return 'D0130185'
  148. if '北戴河' in third_unit:
  149. return 'D0130304'
  150. if area_name == '唐山':
  151. if '滦县' in third_unit:
  152. return 'D0130223'
  153. if '高新技术开发区' in third_unit:
  154. return 'D0130205'
  155. if area_name == '邢台':
  156. if '内丘' in third_unit:
  157. return 'D0130523'
  158. if '任泽' in third_unit:
  159. return 'D0130526'
  160. if area_name == '邯郸':
  161. if '峰峰' in third_unit:
  162. return 'D0130406'
  163. if area_name == '省机动局':
  164. if '沧州' in third_unit:
  165. return 'HECS180'
  166. if '唐山' in third_unit:
  167. return 'HECS181'
  168. if '秦皇岛' in third_unit:
  169. return 'HECS182'
  170. if '廊坊' in third_unit:
  171. return 'HECS183'
  172. if '张家口' in third_unit:
  173. return 'HECS184'
  174. if '邢台' in third_unit:
  175. return 'HECS185'
  176. if '邯郸' in third_unit:
  177. return 'HECS186'
  178. if '保定' in third_unit:
  179. return 'HECS187'
  180. if '石家庄' in third_unit:
  181. return 'HECS188'
  182. if '承德' in third_unit:
  183. return 'HECS189'
  184. if '衡水' in third_unit:
  185. return 'HECS720'
  186. if '雄安' in third_unit:
  187. return 'HECS728'
  188. return 'HECS018'
  189. if '雄安' == area_name:
  190. third_unit = third_unit.replace('雄安新区', '')
  191. third_org_list = third_org_list_map[area_no]
  192. for third_org in third_org_list:
  193. city_name = third_org['name']
  194. if city_name in third_unit:
  195. return third_org['id']
  196. if '沧州' == area_name:
  197. return 'D0130911'
  198. if '唐山' == area_name:
  199. return 'D0130202'
  200. if '秦皇岛' == area_name:
  201. return 'D0130302'
  202. if '廊坊' == area_name:
  203. return 'D0131000'
  204. if '张家口' == area_name:
  205. return 'D0130701'
  206. if '邢台' == area_name:
  207. return 'D0130502'
  208. if '邯郸' == area_name:
  209. return 'D0130402'
  210. if '保定' == area_name:
  211. return 'D0130601'
  212. if '石家庄' == area_name:
  213. return 'D0130186'
  214. if '承德' == area_name:
  215. return 'D0130801'
  216. if '衡水' == area_name:
  217. return 'D0133001'
  218. if '雄安' == area_name:
  219. return 'D0130830'
  220. return 'HE001'
  221. df['city_no'] = df.apply(get_city_no, axis=1) # 应用函数,生成三级组织机构编码列
  222. # 定义函数:根据三级组织机构编码获取三级组织机构名称
  223. def get_city_name(x):
  224. city_no = x['city_no']
  225. third_org = org_map[city_no]
  226. city_name = third_org['name']
  227. return city_name
  228. df['city_name'] = df.apply(get_city_name, axis=1) # 应用函数,生成三级组织机构名称列
  229. # 定义函数:根据标准地址获取城市ID
  230. def get_city_id(x):
  231. address = x['标准地址']
  232. second_unit = x['资产所属单位(二级)']
  233. third_unit = x['资产所属单位(三级)']
  234. if '雄安' in address or ('保定' in address and ('雄县' in address or '容城' in address or '安新' in address)):
  235. return '133100'
  236. for city in cities:
  237. area_name = city['short_name']
  238. area_id = city['area_id']
  239. if area_name in second_unit:
  240. return area_id
  241. if area_name in third_unit:
  242. return area_id
  243. if area_name in address:
  244. return area_id
  245. return ''
  246. df['city_id'] = df.apply(get_city_id, axis=1) # 应用函数,生成城市ID列
  247. # 定义函数:根据城市ID获取城市名称
  248. def get_city(x):
  249. city_id = x['city_id']
  250. area = area_map.get(city_id)
  251. if pd.notna(area):
  252. city = area['area_name']
  253. return city
  254. return ''
  255. df['city'] = df.apply(get_city, axis=1) # 应用函数,生成城市名称列
  256. # 定义函数:根据标准地址获取区县ID
  257. def get_district_id(x):
  258. address = x['标准地址']
  259. city = x['city']
  260. city_id = x['city_id']
  261. if pd.isna(city) or pd.isna(address):
  262. return ''
  263. if city == '石家庄':
  264. if '矿区' in address:
  265. return '130107'
  266. if '井陉' in address:
  267. return '130121'
  268. if city == '唐山':
  269. if '滦县' in address:
  270. return '130284'
  271. if city == '邢台':
  272. if '内邱' in address:
  273. return '130523'
  274. if '任县' in address:
  275. return '130505'
  276. if city == '雄安':
  277. address = address.replace('雄安新区', '')
  278. districts = districts_list_map.get(city_id)
  279. if not districts:
  280. return ''
  281. for district in districts:
  282. district_name = district['short_name']
  283. if district_name in address:
  284. return district['area_id']
  285. return ''
  286. df['district_id'] = df.apply(get_district_id, axis=1) # 应用函数,生成区县ID列
  287. # 定义函数:根据区县ID获取区县名称
  288. def get_district(x):
  289. district_id = x['district_id']
  290. area = area_map.get(district_id)
  291. if pd.notna(area):
  292. district = area['area_name']
  293. return district
  294. return ''
  295. df['district'] = df.apply(get_district, axis=1) # 应用函数,生成区县名称列
  296. # 定义函数:将百分比字符串转换为小数
  297. def convert_percentage_to_number(x):
  298. if pd.notna(x) and isinstance(x, str) and x.endswith('%'):
  299. return Decimal(x[:-1]) / Decimal('100')
  300. return x
  301. df['得房率'] = df['得房率'].apply(convert_percentage_to_number) # 应用函数,将得房率转换为小数
  302. df['year_no'] = start_date.year # 年份列
  303. df['month_no'] = start_date.month # 月份列
  304. def get_int(x):
  305. try:
  306. return int(x)
  307. except Exception:
  308. return ""
  309. df['房龄开始年份'] = df['房龄开始年份'].apply(get_int)
  310. # 定义函数:计算房龄
  311. def get_house_age(x):
  312. house_year_began = x['房龄开始年份']
  313. if pd.notna(house_year_began) and house_year_began:
  314. current_year = start_date.year
  315. return current_year - house_year_began
  316. return ''
  317. df['house_age'] = df.apply(get_house_age, axis=1) # 应用函数,生成房龄列
  318. df.insert(0, 'year_month', year_month) # 在数据框第一列插入年月列
  319. df.rename(
  320. columns={'资产所属单位(一级)': 'first_unit', '资产所属单位(二级)': 'second_unit', '资产所属单位(三级)': 'third_unit',
  321. '建筑别名': 'building_name', '盘点状态': 'inventory_status', '盘点情况': 'inventory_situation',
  322. '是否修改': 'modify', '建筑是否存在待核对信息': 'building_to_be_verified',
  323. '楼层房间是否存在待核对信息': 'floor_room_to_be_verified', '建筑ID': 'building_id',
  324. '得房率': 'housing_acquisition_rate', '上级局址名称': 'site_name', '局址ID': 'site_id',
  325. '上级土地名称': 'land_name', '房屋来源': 'housing_source', '取得日期': 'acquisition_date',
  326. '房龄开始年份': 'house_year_began', '投资主体': 'investor', '管理层级': 'management_level',
  327. '房屋结构': 'building_structure', '楼层总数': 'total_floors', '是否临街': 'frontage',
  328. '是否有院落': 'courtyard', '整栋是否独有': 'whole_building',
  329. '是否有房产证': 'property_ownership_certificate',
  330. '无房产证原因': 'no_property_ownership_certificate_reason', '是否有资产卡片': 'has_asset_card',
  331. '资产编号': 'assets_num', '资产标签号': 'assets_tag_num', '使用状态': 'usage_status',
  332. '建筑用途': 'building_use', '权属状态': 'ownership_status', '建筑占地面积(㎡)': 'floor_area',
  333. '建筑面积(㎡)': 'building_area', '建筑面积-自用(㎡)': 'building_area_self_use',
  334. '建筑面积-出租(㎡)': 'building_area_rent', '建筑面积-闲置(㎡)': 'building_area_idle',
  335. '建筑面积-不可使用(㎡)': 'building_area_unusable', '使用面积(㎡)': 'usable_area',
  336. '使用面积-自用(㎡)': 'usable_area_self_use', '使用面积-出租(㎡)': 'usable_area_rent',
  337. '使用面积-闲置(㎡)': 'usable_area_idle', '使用面积-不可使用(㎡)': 'usable_area_unusable',
  338. '楼长姓名': 'community_assistant_name', '楼长所在单位': 'community_assistant_unit', '经度': 'lng_jt',
  339. '纬度': 'lat_jt', '标准地址': 'address', '实际产权人': 'property_owner', '是否完成清查': 'checked'}, inplace=True)
  340. df = df[['year_month', 'first_unit', 'second_unit', 'third_unit', 'building_name', 'inventory_status',
  341. 'inventory_situation', 'modify', 'building_to_be_verified', 'floor_room_to_be_verified', 'building_id',
  342. 'housing_acquisition_rate', 'site_name', 'site_id', 'land_name', 'housing_source', 'acquisition_date',
  343. 'house_year_began', 'investor', 'management_level', 'building_structure', 'total_floors', 'frontage',
  344. 'courtyard', 'whole_building', 'property_ownership_certificate',
  345. 'no_property_ownership_certificate_reason', 'has_asset_card', 'assets_num', 'assets_tag_num',
  346. 'usage_status', 'building_use', 'ownership_status', 'floor_area', 'building_area',
  347. 'building_area_self_use', 'building_area_rent', 'building_area_idle', 'building_area_unusable',
  348. 'usable_area', 'usable_area_self_use', 'usable_area_rent', 'usable_area_idle', 'usable_area_unusable',
  349. 'community_assistant_name', 'community_assistant_unit', 'lng_jt', 'lat_jt', 'address',
  350. 'property_owner', 'checked', 'area_no', 'area_name', 'city_no', 'city_name', 'city_id', 'city',
  351. 'district_id', 'district', 'year_no', 'month_no', 'house_age']]
  352. # 打印数据框信息
  353. df.info()
  354. # 将结果保存为CSV文件
  355. df.to_csv(path_or_buf=output_path, index=False, encoding='utf-8-sig', lineterminator='\n')
  356. def data_import():
  357. # 定义 PowerShell 脚本的路径
  358. script_path = r"../../copy.ps1"
  359. # 目标表和文件信息
  360. table = "house.building_month" # 数据库目标表名
  361. # 表字段列名,用于指定导入数据的列顺序
  362. columns = "year_month,first_unit,second_unit,third_unit,building_name,inventory_status,inventory_situation,modify,building_to_be_verified,floor_room_to_be_verified,building_id,housing_acquisition_rate,site_name,site_id,land_name,housing_source,acquisition_date,house_year_began,investor,management_level,building_structure,total_floors,frontage,courtyard,whole_building,property_ownership_certificate,no_property_ownership_certificate_reason,has_asset_card,assets_num,assets_tag_num,usage_status,building_use,ownership_status,floor_area,building_area,building_area_self_use,building_area_rent,building_area_idle,building_area_unusable,usable_area,usable_area_self_use,usable_area_rent,usable_area_idle,usable_area_unusable,community_assistant_name,community_assistant_unit,lng_jt,lat_jt,address,property_owner,checked,area_no,area_name,city_no,city_name,city_id,city,district_id,district,year_no,month_no,house_age"
  363. # 构造执行 PowerShell 脚本的命令
  364. command = f"powershell -NoProfile -NonInteractive -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
  365. # 打印生成的命令,方便调试和日志记录
  366. logger.info("command: {}", command)
  367. # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
  368. completed_process = subprocess.run(
  369. command, # 执行的命令
  370. check=False, # 如果命令执行失败,不抛出异常
  371. text=True, # 将输出作为字符串处理
  372. capture_output=True, # 捕获标准输出和标准错误
  373. )
  374. # 打印命令执行的结果,包括返回码、标准输出和标准错误
  375. logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
  376. completed_process.stderr)
  377. # 定义正则表达式,用于匹配标准输出中的 COPY 结果
  378. p = re.compile(r"^(COPY) (\d+)$")
  379. count = None # 初始化计数变量
  380. matcher = p.match(completed_process.stdout) # 匹配标准输出中的 COPY 结果
  381. if matcher:
  382. count = int(matcher.group(2)) # 提取导入的数据行数
  383. # 如果没有成功提取到导入数据的行数,抛出运行时异常
  384. if count is None:
  385. raise RuntimeError("导入数据失败")
  386. def upload_file():
  387. remote_path = f'{remote_dir_path}{year_month}.xlsx' # 定义远程主机的目标文件路径
  388. # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
  389. with paramiko.SSHClient() as ssh:
  390. # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
  391. ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
  392. # 连接到远程主机,传入主机地址、端口、用户名和密码
  393. ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
  394. # 执行远程命令,创建远程目录(如果不存在)
  395. ssh.exec_command(f'mkdir -p {remote_dir_path}')
  396. # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
  397. with ssh.open_sftp() as sftp:
  398. # 记录日志,提示即将上传的本地文件和远程目标路径
  399. logger.info("upload {} to {}", input_path, remote_path)
  400. # 使用SFTP的put方法将本地文件上传到远程主机
  401. sftp.put(input_path, remote_path)
  402. # 记录日志,提示文件已成功上传
  403. logger.info("uploaded {}", input_path)
  404. def data_update():
  405. with psycopg.connect(
  406. conninfo=conn_info,
  407. ) as conn:
  408. with conn.cursor() as curs:
  409. # 更新局址信息
  410. sql = f"""
  411. update
  412. house.building_month a
  413. set
  414. site_num = b.site_num,
  415. city_level = b.city_level,
  416. city_region = b.city_region,
  417. area_sector = b.area_sector,
  418. has_land = b.has_land
  419. from
  420. house.site_month b
  421. where
  422. a.site_id = b.site_id
  423. and a.year_month = b.year_month
  424. and a.year_month = {year_month}
  425. """
  426. logger.info(f"sql: {sql}")
  427. curs.execute(sql)
  428. logger.info(f"update {curs.rowcount}")
  429. # 更新经纬度
  430. sql = f"""
  431. with
  432. t101 as (
  433. select
  434. *
  435. from
  436. house.building_month
  437. where
  438. year_month = 202312
  439. )
  440. update
  441. house.building_month a
  442. set
  443. lng_wgs84 = b.lng_wgs84,
  444. lat_wgs84 = b.lat_wgs84,
  445. lng_bd09 = b.lng_bd09,
  446. lat_bd09 = b.lat_bd09,
  447. building_img = b.building_img
  448. from
  449. t101 b
  450. where
  451. a.year_month = {year_month}
  452. and a.building_id = b.building_id
  453. """
  454. logger.info(f"sql: {sql}")
  455. curs.execute(sql)
  456. logger.info(f"update {curs.rowcount}")
  457. # 更新闲置建筑面积超过1000平米策略
  458. sql = f"""
  459. insert
  460. into
  461. house.building_idle_strategy
  462. (
  463. year_month,
  464. building_id,
  465. first_unit,
  466. second_unit,
  467. third_unit,
  468. site_num,
  469. site_name,
  470. address,
  471. city_level,
  472. city_region,
  473. area_sector,
  474. has_land,
  475. site_id,
  476. building_name,
  477. housing_acquisition_rate,
  478. housing_source,
  479. acquisition_date,
  480. house_year_began,
  481. investor,
  482. management_level,
  483. building_structure,
  484. total_floors,
  485. assets_num,
  486. assets_tag_num,
  487. usage_status,
  488. building_use,
  489. ownership_status,
  490. floor_area,
  491. building_area,
  492. building_area_self_use,
  493. building_area_rent,
  494. building_area_idle,
  495. building_area_unusable,
  496. usable_area,
  497. usable_area_self_use,
  498. usable_area_rent,
  499. usable_area_idle,
  500. usable_area_unusable,
  501. city,
  502. district,
  503. lng_wgs84,
  504. lat_wgs84,
  505. lng_bd09,
  506. lat_bd09,
  507. building_img,
  508. area_no,
  509. area_name,
  510. city_no,
  511. city_name,
  512. year_no,
  513. month_no,
  514. house_age,
  515. land_name,
  516. frontage,
  517. courtyard,
  518. whole_building,
  519. property_ownership_certificate,
  520. no_property_ownership_certificate_reason,
  521. unrelated_assets,
  522. community_assistant_name,
  523. community_assistant_unit,
  524. lng_jt,
  525. lat_jt,
  526. property_owner,
  527. checked,
  528. city_id,
  529. district_id
  530. )
  531. select
  532. year_month,
  533. building_id,
  534. first_unit,
  535. second_unit,
  536. third_unit,
  537. site_num,
  538. site_name,
  539. address,
  540. city_level,
  541. city_region,
  542. area_sector,
  543. has_land,
  544. site_id,
  545. building_name,
  546. housing_acquisition_rate,
  547. housing_source,
  548. acquisition_date,
  549. house_year_began,
  550. investor,
  551. management_level,
  552. building_structure,
  553. total_floors,
  554. assets_num,
  555. assets_tag_num,
  556. usage_status,
  557. building_use,
  558. ownership_status,
  559. floor_area,
  560. building_area,
  561. building_area_self_use,
  562. building_area_rent,
  563. building_area_idle,
  564. building_area_unusable,
  565. usable_area,
  566. usable_area_self_use,
  567. usable_area_rent,
  568. usable_area_idle,
  569. usable_area_unusable,
  570. city,
  571. district,
  572. lng_wgs84,
  573. lat_wgs84,
  574. lng_bd09,
  575. lat_bd09,
  576. building_img,
  577. area_no,
  578. area_name,
  579. city_no,
  580. city_name,
  581. year_no,
  582. month_no,
  583. house_age,
  584. land_name,
  585. frontage,
  586. courtyard,
  587. whole_building,
  588. property_ownership_certificate,
  589. no_property_ownership_certificate_reason,
  590. unrelated_assets,
  591. community_assistant_name,
  592. community_assistant_unit,
  593. lng_jt,
  594. lat_jt,
  595. property_owner,
  596. checked,
  597. city_id,
  598. district_id
  599. from
  600. house.building_month
  601. where
  602. building_area_idle > 1000
  603. and year_month = {year_month}
  604. order by
  605. building_area_idle desc
  606. """
  607. logger.info(f"sql: {sql}")
  608. curs.execute(sql)
  609. logger.info(f"update {curs.rowcount}")
  610. sql = f"""
  611. with
  612. t101 as (
  613. select
  614. *,
  615. row_number() over (
  616. order by building_area_idle desc) as sort
  617. from
  618. house.building_idle_strategy
  619. where
  620. year_month = {year_month}
  621. ),
  622. t201 as (
  623. select
  624. area_no,
  625. area_name,
  626. city_no,
  627. city_name,
  628. 'kpi_301320_155_01' as kpi_code,
  629. '闲置建筑面积' as kpi_name,
  630. round(building_area_idle, 2)::varchar as kpi_value,
  631. '1' as kpi_type,
  632. building_id as jk_object_no,
  633. building_name as jk_object,
  634. sort
  635. from
  636. t101
  637. ),
  638. t202 as (
  639. select
  640. area_no,
  641. area_name,
  642. city_no,
  643. city_name,
  644. 'kpi_301320_155_02' as kpi_code,
  645. '房产名称' as kpi_name,
  646. building_name as kpi_value,
  647. '0' as kpi_type,
  648. building_id as jk_object_no,
  649. building_name as jk_object,
  650. sort
  651. from
  652. t101
  653. ),
  654. t203 as (
  655. select
  656. area_no,
  657. area_name,
  658. city_no,
  659. city_name,
  660. 'kpi_301320_155_03' as kpi_code,
  661. '房产编号' as kpi_name,
  662. building_id as kpi_value,
  663. '0' as kpi_type,
  664. building_id as jk_object_no,
  665. building_name as jk_object,
  666. sort
  667. from
  668. t101
  669. ),
  670. t204 as (
  671. select
  672. area_no,
  673. area_name,
  674. city_no,
  675. city_name,
  676. 'kpi_301320_155_04' as kpi_code,
  677. '房产总建筑面积' as kpi_name,
  678. round(building_area, 2)::varchar as kpi_value,
  679. '0' as kpi_type,
  680. building_id as jk_object_no,
  681. building_name as jk_object,
  682. sort
  683. from
  684. t101
  685. ),
  686. t301 as (
  687. select
  688. *
  689. from
  690. t201
  691. union all
  692. select
  693. *
  694. from
  695. t202
  696. union all
  697. select
  698. *
  699. from
  700. t203
  701. union all
  702. select
  703. *
  704. from
  705. t204
  706. )
  707. insert
  708. into
  709. publish.house_building_idle_strategy
  710. (
  711. acct_date,
  712. dept_code,
  713. dept_name,
  714. strategy_code,
  715. area_no,
  716. area_name,
  717. city_no,
  718. city_name,
  719. sale_no,
  720. sale_name,
  721. jk_object_no,
  722. jk_object,
  723. kpi_code,
  724. kpi_name,
  725. kpi_value,
  726. kpi_type,
  727. sort
  728. )
  729. select
  730. {year_month} as acct_date,
  731. '301320' as dept_code,
  732. '河北省分公司纵横运营中心' as dept_name,
  733. '301320_155' as strategy_code,
  734. area_no,
  735. area_name,
  736. city_no,
  737. city_name,
  738. '' as sale_no,
  739. '' as sale_name,
  740. jk_object_no,
  741. jk_object,
  742. kpi_code,
  743. kpi_name,
  744. kpi_value,
  745. kpi_type,
  746. sort
  747. from
  748. t301
  749. order by
  750. sort,
  751. kpi_code
  752. """
  753. logger.info(f"sql: {sql}")
  754. curs.execute(sql)
  755. logger.info(f"update {curs.rowcount}")
  756. data_process()
  757. data_import()
  758. upload_file()
  759. data_update()