weijianghai há 1 mês atrás
commit
ea1a59d7cb

+ 1652 - 0
.gitignore

@@ -0,0 +1,1652 @@
+### Intellij template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+
+# AWS User-specific
+.idea/**/aws.xml
+
+# Generated files
+.idea/**/contentModel.xml
+
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# SonarLint plugin
+.idea/sonarlint/
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+
+### VisualStudioCode template
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets
+
+# Local History for Visual Studio Code
+.history/
+
+# Built Visual Studio Code Extensions
+*.vsix
+
+### Xcode template
+## User settings
+xcuserdata/
+
+## Xcode 8 and earlier
+*.xcscmblueprint
+*.xccheckout
+
+### Intellij+all template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### Eclipse template
+.metadata
+bin/
+tmp/
+*.tmp
+*.bak
+*.swp
+*~.nib
+local.properties
+.settings/
+.loadpath
+.recommenders
+
+# External tool builders
+.externalToolBuilders/
+
+# Locally stored "Eclipse launch configurations"
+*.launch
+
+# PyDev specific (Python IDE for Eclipse)
+*.pydevproject
+
+# CDT-specific (C/C++ Development Tooling)
+.cproject
+
+# CDT- autotools
+.autotools
+
+# Java annotation processor (APT)
+.factorypath
+
+# PDT-specific (PHP Development Tools)
+.buildpath
+
+# sbteclipse plugin
+.target
+
+# Tern plugin
+.tern-project
+
+# TeXlipse plugin
+.texlipse
+
+# STS (Spring Tool Suite)
+.springBeans
+
+# Code Recommenders
+.recommenders/
+
+# Annotation Processing
+.apt_generated/
+.apt_generated_test/
+
+# Scala IDE specific (Scala & Java development for Eclipse)
+.cache-main
+.scala_dependencies
+.worksheet
+
+# Uncomment this line if you wish to ignore the project description file.
+# Typically, this file would be tracked if it contains build/dependency configurations:
+#.project
+
+### JetBrains+iml template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### JetBrains+all template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### Example user template template
+### Example user template
+
+# IntelliJ project files
+.idea
+*.iml
+out
+gen
+### VisualStudio template
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
+
+# User-specific files
+*.rsuser
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Mono auto generated files
+mono_crash.*
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+[Ww][Ii][Nn]32/
+[Aa][Rr][Mm]/
+[Aa][Rr][Mm]64/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+[Ll]ogs/
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUnit
+*.VisualState.xml
+TestResult.xml
+nunit-*.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+# ASP.NET Scaffolding
+ScaffoldingReadMe.txt
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_h.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp_proj
+*_wpftmp.csproj
+*.log
+*.tlog
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Coverlet is a free, cross platform Code Coverage Tool
+coverage*.json
+coverage*.xml
+coverage*.info
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# NuGet Symbol Packages
+*.snupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+*.appxbundle
+*.appxupload
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!?*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+*- [Bb]ackup.rdl
+*- [Bb]ackup ([0-9]).rdl
+*- [Bb]ackup ([0-9][0-9]).rdl
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio 6 auto-generated project file (contains which files were open etc.)
+*.vbp
+
+# Visual Studio 6 workspace and project file (working project files containing files to include in project)
+*.dsw
+*.dsp
+
+# Visual Studio 6 technical files
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# CodeRush personal settings
+.cr/personal
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output
+ASALocalRun/
+
+# MSBuild Binary and Structured Log
+*.binlog
+
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+
+# MFractors (Xamarin productivity tool) working folder
+.mfractor/
+
+# Local History for Visual Studio
+.localhistory/
+
+# Visual Studio History (VSHistory) files
+.vshistory/
+
+# BeatPulse healthcheck temp database
+healthchecksdb
+
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+MigrationBackup/
+
+# Ionide (cross platform F# VS Code tools) working folder
+.ionide/
+
+# Fody - auto-generated XML schema
+FodyWeavers.xsd
+
+# VS Code files for those working on multiple tools
+*.code-workspace
+
+# Local History for Visual Studio Code
+
+# Windows Installer files from build outputs
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# JetBrains Rider
+*.sln.iml
+
+### macOS template
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### Python template
+# Byte-compiled / optimized / DLL files
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Intellij+iml template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### Intellij template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### VisualStudioCode template
+
+# Local History for Visual Studio Code
+
+# Built Visual Studio Code Extensions
+
+### Xcode template
+## User settings
+
+### Intellij+all template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### XcodeInjection template
+# Code Injection
+#
+# After new code Injection tools there's a generated folder /iOSInjectionProject
+# https://github.com/johnno1962/injectionforxcode
+
+iOSInjectionProject/
+
+### Windows template
+# Windows thumbnail cache files
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+
+# Dump file
+*.stackdump
+
+# Folder config file
+[Dd]esktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+
+# Windows shortcuts
+*.lnk
+
+### JetBrains+iml template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### SublimeText template
+# Cache files for Sublime Text
+*.tmlanguage.cache
+*.tmPreferences.cache
+*.stTheme.cache
+
+# Workspace files are user-specific
+*.sublime-workspace
+
+# Project files should be checked into the repository, unless a significant
+# proportion of contributors will probably not be using Sublime Text
+# *.sublime-project
+
+# SFTP configuration file
+sftp-config.json
+sftp-config-alt*.json
+
+# Package control specific files
+Package Control.last-run
+Package Control.ca-list
+Package Control.ca-bundle
+Package Control.system-ca-bundle
+Package Control.cache/
+Package Control.ca-certs/
+Package Control.merged-ca-bundle
+Package Control.user-ca-bundle
+oscrypto-ca-bundle.crt
+bh_unicode_properties.cache
+
+# Sublime-github package stores a github token in this file
+# https://packagecontrol.io/packages/sublime-github
+GitHub.sublime-settings
+
+### JetBrains+all template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### Spreadsheet template
+*.xlr
+*.xls
+*.xlsx
+
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### Example user template template
+### Example user template
+
+# IntelliJ project files
+### VisualStudio template
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
+
+# User-specific files
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+
+# Mono auto generated files
+
+# Build results
+
+# Visual Studio 2015/2017 cache/options directory
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+
+# MSTest test Results
+
+# NUnit
+
+# Build Results of an ATL Project
+
+# Benchmark Results
+
+# .NET Core
+
+# ASP.NET Scaffolding
+
+# StyleCop
+
+# Files built by Visual Studio
+
+# Chutzpah Test files
+
+# Visual C++ cache files
+
+# Visual Studio profiler
+
+# Visual Studio Trace Files
+
+# TFS 2012 Local Workspace
+
+# Guidance Automation Toolkit
+
+# ReSharper is a .NET coding add-in
+
+# TeamCity is a build add-in
+
+# DotCover is a Code Coverage Tool
+
+# AxoCover is a Code Coverage Tool
+
+# Coverlet is a free, cross platform Code Coverage Tool
+
+# Visual Studio code coverage results
+
+# NCrunch
+
+# MightyMoose
+
+# Web workbench (sass)
+
+# Installshield output folder
+
+# DocProject is a documentation generator add-in
+
+# Click-Once directory
+
+# Publish Web Output
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+
+# NuGet Packages
+# NuGet Symbol Packages
+# The packages folder can be ignored because of Package Restore
+# except build/, which is used as an MSBuild target.
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+
+# Microsoft Azure Build Output
+
+# Microsoft Azure Emulator
+
+# Windows Store app package directories and files
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+# but keep track of directories ending in .cache
+
+# Others
+
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+
+# SQL Server files
+
+# Business Intelligence projects
+
+# Microsoft Fakes
+
+# GhostDoc plugin setting file
+
+# Node.js Tools for Visual Studio
+
+# Visual Studio 6 build log
+
+# Visual Studio 6 workspace options file
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+
+# Visual Studio 6 auto-generated project file (contains which files were open etc.)
+
+# Visual Studio 6 workspace and project file (working project files containing files to include in project)
+
+# Visual Studio 6 technical files
+
+# Visual Studio LightSwitch build output
+
+# Paket dependency manager
+
+# FAKE - F# Make
+
+# CodeRush personal settings
+
+# Python Tools for Visual Studio (PTVS)
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+
+# Telerik's JustMock configuration file
+
+# BizTalk build output
+
+# OpenCover UI analysis results
+
+# Azure Stream Analytics local run output
+
+# MSBuild Binary and Structured Log
+
+# NVidia Nsight GPU debugger configuration file
+
+# MFractors (Xamarin productivity tool) working folder
+
+# Local History for Visual Studio
+
+# Visual Studio History (VSHistory) files
+
+# BeatPulse healthcheck temp database
+
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+
+# Ionide (cross platform F# VS Code tools) working folder
+
+# Fody - auto-generated XML schema
+
+# VS Code files for those working on multiple tools
+
+# Local History for Visual Studio Code
+
+# Windows Installer files from build outputs
+
+# JetBrains Rider
+
+### macOS template
+# General
+
+# Icon must end with two \r
+
+# Thumbnails
+
+# Files that might appear in the root of a volume
+
+# Directories potentially created on remote AFP share
+
+### Intellij+iml template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+
+# AWS User-specific
+
+# Generated files
+
+# Sensitive or high-churn files
+
+# Gradle
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+
+# Mongo Explorer plugin
+
+# File-based project format
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Cursive Clojure plugin
+
+# SonarLint plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+# Editor-based Rest Client
+
+# Android studio 3.1+ serialized cache file
+
+### Python template
+# Byte-compiled / optimized / DLL files
+
+# C extensions
+
+# Distribution / packaging
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+
+# Installer logs
+
+# Unit test / coverage reports
+
+# Translations
+
+# Django stuff:
+
+# Flask stuff:
+
+# Scrapy stuff:
+
+# Sphinx documentation
+
+# PyBuilder
+
+# Jupyter Notebook
+
+# IPython
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+
+# Celery stuff
+
+# SageMath parsed files
+
+# Environments
+
+# Spyder project settings
+
+# Rope project settings
+
+# mkdocs documentation
+
+# mypy
+
+# Pyre type checker
+
+# pytype static type analyzer
+
+# Cython debug symbols
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+*.csv

+ 698 - 0
car/car-chao-bao/car_chao-bao.py

@@ -0,0 +1,698 @@
+"""车辆超保数据处理
+"""
+
+import re
+import subprocess
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from loguru import logger
+import pandas as pd
+import psycopg
+import paramiko
+
+# 添加日志记录,将日志输出到文件 a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/car/chao-bao/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+# 获取当前日期,并计算上个月的第一天
+today = datetime.today()
+start_date = today - relativedelta(months=1, day=1)
+year_month = start_date.strftime('%Y%m')
+# 数据文件路径
+input_path = 'data.xlsx'
+# 输出文件路径
+output_path = 'output.csv'
+
+
+def data_process():
+    # 正则表达式匹配车牌省份简称(如京、津、晋等)
+    has_che_pai_province_pattern = re.compile(
+        "[" + re.escape("京津晋冀蒙辽吉黑沪苏浙皖闽赣鲁豫鄂湘粤桂琼渝川贵云藏陕甘青宁国防") + "]")
+
+    # 正则表达式匹配非车牌字符,排除车牌可能包含的字符(如字母、数字、特殊标志等)
+    not_che_pai_pattern = re.compile(
+        "[^京津晋冀蒙辽吉黑沪苏浙皖闽赣鲁豫鄂湘粤桂琼渝川贵云藏陕甘青宁新港澳学挂领试超练警国防A-Z\\d]")
+
+    # 正则表达式匹配完整的车牌号格式
+    che_pai_pattern = re.compile(
+        r"([京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z]"
+        r"(([DF]((?![IO])[A-Z0-9](?![IO]))\d{4})|(\d{5}[DF]))|"
+        r"[京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z][A-Z0-9]{4}[A-Z0-9挂学警港澳])"
+    )
+
+    # 定义二级行政区划映射表(地级市及其下属区县)
+    er_ji_map = {
+        "石家庄": ["鹿泉", "藁城", "栾城", "井陉矿区", "井陉", "无极", "正定", "元氏", "新乐", "晋州", "平山", "灵寿",
+                   "赞皇", "赵县", "行唐", "高邑", "辛集", "深泽"],
+        "唐山": ["唐山高开区", "迁西", "海港", "开平", "丰南", "滦县", "乐亭", "丰润", "玉田", "古冶", "曹妃甸", "遵化",
+                 "滦南", "迁安"],
+        "秦皇岛": ["北戴河新区", "北戴河", "山海关", "昌黎", "卢龙", "青龙", "抚宁"],
+        "邯郸": ["曲周", "魏县", "馆陶", "磁县", "大名", "鸡泽", "成安", "涉县", "永年", "武安", "峰峰", "广平", "临漳",
+                 "邱县", "肥乡"],
+        "邢台": ["新河", "南宫", "隆尧", "内邱", "平乡", "宁晋", "广宗", "清河", "临西", "任县", "巨鹿", "沙河", "威县",
+                 "临城", "柏乡", "南和"],
+        "保定": ["涞水", "蠡县", "顺平", "博野", "安国", "涞源", "唐县", "定州", "高阳", "曲阳", "阜平", "清苑",
+                 "高碑店",
+                 "满城", "涿州", "易县", "望都", "徐水", "定兴", "白沟"],
+        "张家口": ["张北", "崇礼", "康保", "赤城", "阳原", "万全", "下花园", "尚义", "怀安", "怀来", "蔚县", "涿鹿",
+                   "沽源",
+                   "宣化"],
+        "承德": ["承德县", "兴隆", "宽城", "平泉", "营子", "隆化", "滦平", "围场", "丰宁", "双滦"],
+        "廊坊": ["文安", "霸州", "大城", "廊坊开发区", "三河", "香河", "永清", "胜芳", "燕郊", "固安", "大厂"],
+        "沧州": ["东光", "吴桥", "黄骅", "盐山", "孟村", "泊头", "献县", "南皮", "渤海新区", "海兴", "沧县", "河间",
+                 "青县",
+                 "任丘", "肃宁"],
+        "衡水": ["景县", "阜城", "枣强", "深州", "饶阳", "故城", "武强", "武邑", "冀州", "安平"],
+        "雄安": ["容城", "雄县", "安新"]
+    }
+
+    # 初始化组织结构映射表
+    org_map = {}
+    third_org_map = {}
+    third_org_list_map = {}
+    area_map = {}
+    district_list_map = {}
+
+    # 连接PostgreSQL数据库
+    with psycopg.connect(
+            conninfo=conn_info,
+            row_factory=psycopg.rows.dict_row
+    ) as conn:
+        with conn.cursor() as curs:
+            # 查询一级组织数据,并按order_num排序
+            sql = """
+                select * from common.organization where grade = 1 order by order_num
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            second_orgs = curs.fetchall()
+
+            # 遍历一级组织数据,构建org_map和third_org_list_map
+            for x in second_orgs:
+                org_map[x['id']] = x
+                third_org_list_map[x['id']] = []
+
+            # 查询二级组织数据,并按parent_id和order_num排序
+            sql = """
+                select * from common.organization where grade = 2 order by parent_id, order_num
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            third_orgs = curs.fetchall()
+
+            # 遍历二级组织数据,构建org_map、third_org_list_map和third_org_map
+            for x in third_orgs:
+                org_map[x['id']] = x
+                third_org_list_map[x['parent_id']].append(x)
+                third_org_map[x['id']] = x
+
+            # 查询一级行政区划数据,并按area_id排序
+            sql = """
+                select * from common.area where area_grade = 1 order by area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            cities = curs.fetchall()
+
+            # 遍历一级行政区划数据,构建area_map
+            for city in cities:
+                area_map[city['area_id']] = city
+
+            # 查询二级行政区划数据,并按parent_id和area_id排序
+            sql = """
+                select * from common.area where area_grade = 2 order by parent_id, area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            districts = curs.fetchall()
+
+            # 遍历二级行政区划数据,构建area_map和district_list_map
+            for district in districts:
+                area_map[district['area_id']] = district
+
+            # 构建城市与区县的映射关系
+            for city in cities:
+                district_list_map[city['area_id']] = []
+                for district in districts:
+                    if city['area_id'] == district['parent_id']:
+                        district_list_map[city['area_id']].append(district)
+
+    # 读取 Excel 文件中的数据
+    df = pd.read_excel(io=input_path)
+    # 获取需要清理的列名列表,排除 "登记日期" 和 "进厂时间" 列
+    columns_to_clean = list(filter(lambda x: x not in ('登记日期', '进厂时间'), df.columns))
+    # 对需要清理的列进行字符串清理,移除多余的空白字符
+    df[columns_to_clean] = df[columns_to_clean].map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x)
+    df['账期'] = year_month
+    # 保存原始单位和车牌号信息到新的列中
+    df['原始一级单位'] = df['一级单位']
+    df['原始二级单位'] = df['二级单位']
+    df['原始三级单位'] = df['三级单位']
+    df['原始车牌号'] = df['车牌号']
+
+    # 定义函数,用于提取并标准化车牌号
+    def get_che_pai(che_pai):
+        # 如果车牌号为空或无效,则返回空字符串
+        if pd.isna(che_pai) or not che_pai or not che_pai.strip():
+            return ""
+        # 将车牌号转换为大写
+        upper_case = che_pai.upper()
+        # 移除车牌号中不符合规则的字符
+        s = not_che_pai_pattern.sub("", upper_case)
+        # 使用正则表达式匹配合法的车牌号
+        m = che_pai_pattern.search(s)
+        if m:
+            return m.group(0)
+
+        # 如果车牌号包含省份简称但未匹配成功,记录警告日志
+        if has_che_pai_province_pattern.search(che_pai):
+            logger.warning(f"车牌匹配失败: {che_pai} -> {s}")
+            return s
+
+        # 如果完全无法匹配,记录警告日志并返回原车牌号
+        logger.warning(f"车牌匹配失败: {che_pai} -> {upper_case}")
+        return upper_case
+
+    # 应用 get_che_pai 函数处理车牌号列
+    df['车牌号'] = df['车牌号'].apply(get_che_pai)
+    # 去重
+    df.drop_duplicates(subset=['车牌号'], keep='last', inplace=True)
+
+    # 定义函数,用于标记车牌号是否匹配失败
+    def che_pai_fail(che_pai):
+        # 如果车牌号为空或无效,则标记为失败
+        if pd.isna(che_pai) or not che_pai or not che_pai.strip():
+            return "1"
+
+        # 移除车牌号中不符合规则的字符
+        s = not_che_pai_pattern.sub("", che_pai.upper())
+        # 使用正则表达式匹配合法的车牌号
+        m = che_pai_pattern.search(s)
+        if m:
+            return "0"  # 匹配成功
+
+        return "1"  # 匹配失败
+
+    # 应用 che_pai_fail 函数生成车牌匹配失败标记列
+    df['车牌匹配失败'] = df['车牌号'].apply(che_pai_fail)
+
+    # 定义函数,用于提取一级单位
+    def get_first_unit(unit):
+        # 如果单位为空或无效,则返回空字符串
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 根据单位名称中的关键词返回对应的一级单位
+        if "机动通信局" in unit or "机动局" in unit or "传输局" in unit or "线路维护中心" in unit:
+            return "机动局"
+        if "雄安基地建设部" in unit:
+            return "雄安基地建设部"
+        if "华北基地建设部" in unit:
+            return "华北基地建设部"
+        # 遍历 er_ji_map 的键,寻找匹配的一级单位
+        for yj in er_ji_map.keys():
+            if yj in unit:
+                return yj
+        return "省公司本部"  # 默认返回省公司本部
+
+    # 应用 get_first_unit 函数生成一级单位列
+    df['一级单位'] = df['原始一级单位'].apply(get_first_unit)
+
+    # 定义函数,用于提取二级单位
+    def get_second_unit(x):
+        # 获取一级单位和原始二级单位
+        first_unit = str(x['一级单位']) if pd.notna(x['一级单位']) else ""
+        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        # 如果二级单位为空或无效,则返回一级单位
+        if not unit or not unit.strip():
+            return first_unit
+        # 如果一级单位是省公司本部,则返回省公司本部
+        if first_unit == "省公司本部":
+            return first_unit
+        # 如果一级单位是机动局,则根据单位名称进一步细化
+        if first_unit == "机动局":
+            for yj in er_ji_map.keys():
+                if yj in unit:
+                    return f"机动局{yj}"
+            return "机动局本部"
+        # 根据特定城市和关键词返回对应的二级单位
+        if first_unit == "石家庄":
+            if "开发区" in unit:
+                return "石家庄开发区"
+        if first_unit == "廊坊":
+            if "开发区" in unit:
+                return "廊坊开发区"
+        if first_unit == "邢台":
+            if "内丘" in unit:
+                return "内邱"
+            if "任泽" in unit:
+                return "任县"
+        if first_unit == "唐山":
+            if "高开区" in unit:
+                return "唐山高开区"
+            if "滦州" in unit:
+                return "滦县"
+        # 根据 er_ji_map 获取二级单位
+        ejs = er_ji_map.get(first_unit)
+        if not ejs:
+            return first_unit
+        if first_unit == "雄安":
+            unit = unit.replace("雄安新区", "")
+        for ej in ejs:
+            if ej in unit:
+                return ej
+        return f"{first_unit}本部"  # 默认返回一级单位本部
+
+    # 应用 get_second_unit 函数生成二级单位列
+    df['二级单位'] = df.apply(get_second_unit, axis=1)
+
+    # 定义函数,用于提取三级单位
+    def get_third_unit(x):
+        # 获取二级单位和原始三级单位
+        second_unit = str(x['二级单位']) if pd.notna(x['二级单位']) else ""
+        unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        # 如果三级单位为空或无效,则返回二级单位
+        if not unit or not unit.strip():
+            return second_unit
+        # 按下划线分割三级单位名称
+        a = unit.split("_")
+        if len(a) == 1:
+            return unit
+        if len(a) < 4:
+            return second_unit
+        return a[3]  # 返回分割后的第四个部分作为三级单位
+
+    # 应用 get_third_unit 函数生成三级单位列
+    df['三级单位'] = df.apply(get_third_unit, axis=1)
+
+    # 定义一个函数,用于根据单位名称获取二级组织机构编码
+    def get_area_no(unit):
+        # 如果单位为空或无效,则返回空字符串
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 如果单位包含特定关键词(如“机动通信局”等),返回固定编码"-11"
+        if any(keyword in unit for keyword in ["机动通信局", "机动局", "传输局", "线路维护中心"]):
+            return "-11"
+        # 如果单位包含特定关键词(如“省公司本部”等),返回固定编码"-12"
+        if any(keyword in unit for keyword in ["省公司本部", "雄安基地建设部", "华北基地建设部"]):
+            return "-12"
+        # 遍历second_orgs列表,匹配单位名称并返回对应的id
+        for second_org in second_orgs:
+            if second_org.get('name') in unit:
+                return second_org.get('id')
+        # 如果未匹配到任何规则,返回默认编码"-12"
+        return "-12"
+
+    # 将get_area_no函数应用到DataFrame的'原始一级单位'列,生成'二级组织机构编码'列
+    df['二级组织机构编码'] = df['原始一级单位'].apply(get_area_no)
+
+    # 定义一个函数,用于根据组织机构编码获取组织机构名称
+    def get_org_name(org_no):
+        # 如果编码为空或无效,则返回空字符串
+        if pd.isna(org_no) or not org_no or not org_no.strip():
+            return ""
+        # 在org_map中查找对应编码的组织机构信息,并返回其名称
+        po = org_map.get(org_no)
+        if po is not None:
+            return po.get('name')
+        return ""
+
+    # 将get_org_name函数应用到'二级组织机构编码'列,生成'二级组织机构名称'列
+    df['二级组织机构名称'] = df['二级组织机构编码'].apply(get_org_name)
+
+    # 定义一个函数,用于根据行数据获取三级组织机构编码
+    def get_city_no(x):
+        # 获取相关字段值,如果为空则设置为""
+        area_no = str(x['二级组织机构编码']) if pd.notna(x['二级组织机构编码']) else ""
+        area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
+        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        # 如果二级组织机构编码或名称为空,则返回""
+        if not area_no or not area_name:
+            return ""
+        # 根据不同的二级组织机构名称和单位内容,返回对应的三级组织机构编码
+        if area_name == "石家庄":
+            if "井陉矿区" in unit:
+                return "D0130185"
+            if "井陉" in unit:
+                return "D0130121"
+        if area_name == "秦皇岛":
+            if "北戴河新区" in unit:
+                return "D0130325"
+            if "北戴河" in unit:
+                return "D0130304"
+        if area_name == "邯郸":
+            if "峰峰" in unit:
+                return "D0130406"
+        if area_name == "邢台":
+            if "内丘" in unit:
+                return "D0130523"
+            if "任泽" in unit:
+                return "D0130526"
+        if area_name == "省机动局":
+            if "沧州" in unit:
+                return "HECS180"
+            if "唐山" in unit:
+                return "HECS181"
+            if "秦皇岛" in unit:
+                return "HECS182"
+            if "廊坊" in unit:
+                return "HECS183"
+            if "张家口" in unit:
+                return "HECS184"
+            if "邢台" in unit:
+                return "HECS185"
+            if "邯郸" in unit:
+                return "HECS186"
+            if "保定" in unit:
+                return "HECS187"
+            if "石家庄" in unit:
+                return "HECS188"
+            if "承德" in unit:
+                return "HECS189"
+            if "衡水" in unit:
+                return "HECS720"
+            if "雄安" in unit:
+                return "HECS728"
+            return "HECS018"
+        if area_name == "雄安":
+            unit = unit.replace("雄安新区", "")
+        l3 = third_org_list_map.get(area_no, [])
+        for organization_po in l3:
+            if organization_po.get('name') in unit:
+                return organization_po.get('id')
+        if area_name == "沧州":
+            return "D0130911"
+        if area_name == "唐山":
+            return "D0130202"
+        if area_name == "秦皇岛":
+            return "D0130302"
+        if area_name == "廊坊":
+            return "D0131000"
+        if area_name == "张家口":
+            return "D0130701"
+        if area_name == "邢台":
+            return "D0130502"
+        if area_name == "邯郸":
+            return "D0130402"
+        if area_name == "保定":
+            return "D0130601"
+        if area_name == "石家庄":
+            return "D0130186"
+        if area_name == "承德":
+            return "D0130801"
+        if area_name == "衡水":
+            return "D0133001"
+        if area_name == "雄安":
+            return "D0130830"
+        return "HE001"
+
+    # 将get_city_no函数应用到DataFrame的每一行,生成'三级组织机构编码'列
+    df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
+    # 将get_org_name函数应用到'三级组织机构编码'列,生成'三级组织机构名称'列
+    df['三级组织机构名称'] = df['三级组织机构编码'].apply(get_org_name)
+
+    # 定义一个函数,用于根据行数据获取二级组织机构编码2
+    def get_area_no2(x):
+        # 获取相关字段值,如果为空则设置为""
+        area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
+        city_name = str(x['三级组织机构名称']) if pd.notna(x['三级组织机构名称']) else ""
+        # 如果二级组织机构名称为空,则返回""
+        if not area_name or not area_name.strip():
+            return ""
+        # 根据二级组织机构名称和三级组织机构名称的内容,返回对应的编码
+        if area_name == "省机动局" and city_name and city_name.strip():
+            if "沧州" in city_name:
+                return "180"
+            if "唐山" in city_name:
+                return "181"
+            if "秦皇岛" in city_name:
+                return "182"
+            if "廊坊" in city_name:
+                return "183"
+            if "张家口" in city_name:
+                return "184"
+            if "邢台" in city_name:
+                return "185"
+            if "邯郸" in city_name:
+                return "186"
+            if "保定" in city_name:
+                return "187"
+            if "石家庄" in city_name:
+                return "188"
+            if "承德" in city_name:
+                return "189"
+            if "衡水" in city_name:
+                return "720"
+            if "雄安" in city_name:
+                return "782"
+        if "沧州" in area_name:
+            return "180"
+        if "唐山" in area_name:
+            return "181"
+        if "秦皇岛" in area_name:
+            return "182"
+        if "廊坊" in area_name:
+            return "183"
+        if "张家口" in area_name:
+            return "184"
+        if "邢台" in area_name:
+            return "185"
+        if "邯郸" in area_name:
+            return "186"
+        if "保定" in area_name:
+            return "187"
+        if "石家庄" in area_name:
+            return "188"
+        if "承德" in area_name:
+            return "189"
+        if "衡水" in area_name:
+            return "720"
+        if "雄安" in area_name:
+            return "782"
+        return ""
+
+    # 将get_area_no2函数应用到DataFrame的每一行,生成'二级组织机构编码2'列
+    df['二级组织机构编码2'] = df.apply(get_area_no2, axis=1)
+    # 将get_org_name函数应用到'二级组织机构编码2'列,生成'二级组织机构名称2'列
+    df['二级组织机构名称2'] = df['二级组织机构编码2'].apply(get_org_name)
+
+    # 定义一个函数,用于根据单位名称获取城市ID
+    def get_city_id(unit):
+        # 如果单位为空或无效,则返回""
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 遍历cities列表,匹配单位名称并返回对应的城市ID
+        for city in cities:
+            if city.get('short_name') and city['short_name'] in unit:
+                return city.get('area_id', "")
+        return ""
+
+    # 将get_city_id函数应用到'原始一级单位'列,生成'city_id'列
+    df['city_id'] = df['原始一级单位'].apply(get_city_id)
+
+    # 定义一个函数,用于根据ID获取区域名称
+    def get_area_name(id):
+        # 如果ID为空或无效,则返回""
+        if pd.isna(id) or not id or not id.strip():
+            return ""
+        # 在area_map中查找对应ID的区域信息,并返回其名称
+        area_po = area_map.get(id)
+        if area_po is not None:
+            return area_po.get("area_name", "")
+        return ""
+
+    # 将get_area_name函数应用到'city_id'列,生成'city'列
+    df['city'] = df['city_id'].apply(get_area_name)
+
+    # 定义一个函数,用于根据行数据获取区县ID
+    def get_district_id(x):
+        # 获取相关字段值,如果为空则设置为""
+        city_id = str(x['city_id']) if pd.notna(x['city_id']) else ""
+        city = str(x['city']) if pd.notna(x['city']) else ""
+        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        # 如果城市ID、城市名称或单位为空,则返回""
+        if not city_id or not city or not unit:
+            return ""
+        # 根据城市名称和单位内容,返回对应的区县ID
+        if city == "石家庄":
+            if "井陉矿区" in unit:
+                return "130107"
+            if "井陉" in unit:
+                return "130121"
+        if city == "雄安":
+            unit = unit.replace("雄安新区", "")
+        districts = district_list_map.get(city_id)
+        if not districts:
+            return ""
+        for district in districts:
+            if district.get('short_name') in unit:
+                return district.get('area_id')
+        return ""
+
+    # 将get_district_id函数应用到DataFrame的每一行,生成'district_id'列
+    df['district_id'] = df.apply(get_district_id, axis=1)
+    # 将get_area_name函数应用到'district_id'列,生成'district'列
+    df['district'] = df['district_id'].apply(get_area_name)
+
+    # 提取账期年份和月份信息
+    df['year_no'] = df['账期'].apply(lambda x: None if pd.isna(x) else str(x)[:4])
+    df['month_no'] = df['账期'].apply(lambda x: None if pd.isna(x) else str(x)[-2:])
+
+
+    def to_datetime(x):
+        try:
+            return pd.to_datetime(x)
+        except Exception:
+            return None
+
+
+    df['登记日期'] = df['登记日期'].apply(to_datetime)
+    df['进厂时间'] = df['进厂时间'].apply(to_datetime)
+
+
+    def get_num(x):
+        try:
+            return float(x)
+        except Exception:
+            return None
+
+    df['公里数'] = df['公里数'].apply(get_num)
+    df['截止数据提取日行驶里程'] = df['截止数据提取日行驶里程'].apply(get_num)
+    df['超出建议保养公里数'] = df['超出建议保养公里数'].apply(get_num)
+
+    def get_int(x):
+        try:
+            return int(x)
+        except Exception:
+            return ""
+
+    df['超出建议保养时间(天)'] = df['超出建议保养时间(天)'].apply(get_int)
+    # 打印DataFrame的信息
+    print(df.info())
+
+    # 将处理后的数据保存到CSV文件中
+    df.to_csv(path_or_buf=output_path,
+              header=['year_month', 'che_pai_hao', 'che_xing', 'first_unit', 'second_unit', 'third_unit',
+                      'deng_ji_ri_qi', 'jin_chang_shi_jian', 'jin_chang_gong_li', 'li_cheng', 'bao_yang',
+                      'chao_bao_tian_shu', 'chao_bao_gong_li', 'raw_yi_ji', 'raw_er_ji', 'raw_san_ji',
+                      'raw_che_pai_hao', 'che_pai_fail', 'area_no', 'area_name', 'city_no', 'city_name', 'area_no2',
+                      'area_name2', 'city_id', 'city', 'district_id', 'district', 'year_no', 'month_no'],
+              index=False,
+              encoding='utf-8-sig')
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "car.car_chao_bao"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "year_month,che_pai_hao,che_xing,first_unit,second_unit,third_unit,deng_ji_ri_qi,jin_chang_shi_jian,jin_chang_gong_li,li_cheng,bao_yang,chao_bao_tian_shu,chao_bao_gong_li,raw_yi_ji,raw_er_ji,raw_san_ji,raw_che_pai_hao,che_pai_fail,area_no,area_name,city_no,city_name,area_no2,area_name2,city_id,city,district_id,district,year_no,month_no"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    remote_path = f'{remote_dir_path}{year_month}.xlsx'  # 定义远程主机的目标文件路径
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            # 记录日志,提示即将上传的本地文件和远程目标路径
+            logger.info("upload {} to {}", input_path, remote_path)
+            # 使用SFTP的put方法将本地文件上传到远程主机
+            sftp.put(input_path, remote_path)
+            # 记录日志,提示文件已成功上传
+            logger.info("uploaded {}", input_path)
+
+
+def data_update():
+    with psycopg.connect(
+            conninfo=conn_info,
+    ) as conn:
+        with conn.cursor() as curs:
+            # 插入过检
+            sql = f"""
+            insert
+                into
+                car_theme.wz_f_severely_over_maintained_leased_vehicles_details
+            (
+            statistical_month,
+                card_num,
+                car_brand,
+                enable_date,
+                arrival_time,
+                kilometers_entering_the_factory,
+                mileage_driven_as_of_data_extraction_date,
+                should_maintenance_be_carried_out,
+                exceeding_the_recommended_maintenance_time,
+                exceeding_the_recommended_maintenance_mileage,
+                city,
+                dpt_sec,
+                grid
+            )
+            select
+                year_month as statistical_month,
+                che_pai_hao as card_num,
+                che_xing as car_brand,
+                deng_ji_ri_qi as enable_date,
+                jin_chang_shi_jian as arrival_time,
+                jin_chang_gong_li as kilometers_entering_the_factory,
+                li_cheng as mileage_driven_as_of_data_extraction_date,
+                bao_yang as should_maintenance_be_carried_out,
+                chao_bao_tian_shu as exceeding_the_recommended_maintenance_time,
+                chao_bao_gong_li as exceeding_the_recommended_maintenance_mileage,
+                first_unit as city,
+                second_unit as dpt_sec,
+                third_unit as grid
+            from
+                car.car_chao_bao
+            where
+                year_month = {year_month}
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+
+
+data_process()
+data_import()
+upload_file()
+data_update()

+ 665 - 0
car/car-guo-jian/car_guo_jian.py

@@ -0,0 +1,665 @@
+"""车辆过检数据处理
+"""
+
+import re
+import subprocess
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from loguru import logger
+import pandas as pd
+import psycopg
+import paramiko
+
+# 添加日志记录,将日志输出到文件 a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/car/guo-jian/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+# 获取当前日期,并计算上个月的第一天
+today = datetime.today()
+start_date = today - relativedelta(months=1, day=1)
+year_month = start_date.strftime('%Y%m')
+# 数据文件路径
+input_path = 'data.xlsx'
+# 输出文件路径
+output_path = 'output.csv'
+
+
+def data_process():
+    # 正则表达式匹配车牌省份简称(如京、津、晋等)
+    has_che_pai_province_pattern = re.compile(
+        "[" + re.escape("京津晋冀蒙辽吉黑沪苏浙皖闽赣鲁豫鄂湘粤桂琼渝川贵云藏陕甘青宁国防") + "]")
+
+    # 正则表达式匹配非车牌字符,排除车牌可能包含的字符(如字母、数字、特殊标志等)
+    not_che_pai_pattern = re.compile(
+        "[^京津晋冀蒙辽吉黑沪苏浙皖闽赣鲁豫鄂湘粤桂琼渝川贵云藏陕甘青宁新港澳学挂领试超练警国防A-Z\\d]")
+
+    # 正则表达式匹配完整的车牌号格式
+    che_pai_pattern = re.compile(
+        r"([京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z]"
+        r"(([DF]((?![IO])[A-Z0-9](?![IO]))\d{4})|(\d{5}[DF]))|"
+        r"[京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z][A-Z0-9]{4}[A-Z0-9挂学警港澳])"
+    )
+
+    # 定义二级行政区划映射表(地级市及其下属区县)
+    er_ji_map = {
+        "石家庄": ["鹿泉", "藁城", "栾城", "井陉矿区", "井陉", "无极", "正定", "元氏", "新乐", "晋州", "平山", "灵寿",
+                   "赞皇", "赵县", "行唐", "高邑", "辛集", "深泽"],
+        "唐山": ["唐山高开区", "迁西", "海港", "开平", "丰南", "滦县", "乐亭", "丰润", "玉田", "古冶", "曹妃甸", "遵化",
+                 "滦南", "迁安"],
+        "秦皇岛": ["北戴河新区", "北戴河", "山海关", "昌黎", "卢龙", "青龙", "抚宁"],
+        "邯郸": ["曲周", "魏县", "馆陶", "磁县", "大名", "鸡泽", "成安", "涉县", "永年", "武安", "峰峰", "广平", "临漳",
+                 "邱县", "肥乡"],
+        "邢台": ["新河", "南宫", "隆尧", "内邱", "平乡", "宁晋", "广宗", "清河", "临西", "任县", "巨鹿", "沙河", "威县",
+                 "临城", "柏乡", "南和"],
+        "保定": ["涞水", "蠡县", "顺平", "博野", "安国", "涞源", "唐县", "定州", "高阳", "曲阳", "阜平", "清苑",
+                 "高碑店",
+                 "满城", "涿州", "易县", "望都", "徐水", "定兴", "白沟"],
+        "张家口": ["张北", "崇礼", "康保", "赤城", "阳原", "万全", "下花园", "尚义", "怀安", "怀来", "蔚县", "涿鹿",
+                   "沽源",
+                   "宣化"],
+        "承德": ["承德县", "兴隆", "宽城", "平泉", "营子", "隆化", "滦平", "围场", "丰宁", "双滦"],
+        "廊坊": ["文安", "霸州", "大城", "廊坊开发区", "三河", "香河", "永清", "胜芳", "燕郊", "固安", "大厂"],
+        "沧州": ["东光", "吴桥", "黄骅", "盐山", "孟村", "泊头", "献县", "南皮", "渤海新区", "海兴", "沧县", "河间",
+                 "青县",
+                 "任丘", "肃宁"],
+        "衡水": ["景县", "阜城", "枣强", "深州", "饶阳", "故城", "武强", "武邑", "冀州", "安平"],
+        "雄安": ["容城", "雄县", "安新"]
+    }
+
+    # 初始化组织结构映射表
+    org_map = {}
+    third_org_map = {}
+    third_org_list_map = {}
+    area_map = {}
+    district_list_map = {}
+
+    # 连接PostgreSQL数据库
+    with psycopg.connect(
+            conninfo=conn_info,
+            row_factory=psycopg.rows.dict_row
+    ) as conn:
+        with conn.cursor() as curs:
+            # 查询一级组织数据,并按order_num排序
+            sql = """
+                select * from common.organization where grade = 1 order by order_num
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            second_orgs = curs.fetchall()
+
+            # 遍历一级组织数据,构建org_map和third_org_list_map
+            for x in second_orgs:
+                org_map[x['id']] = x
+                third_org_list_map[x['id']] = []
+
+            # 查询二级组织数据,并按parent_id和order_num排序
+            sql = """
+                select * from common.organization where grade = 2 order by parent_id, order_num
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            third_orgs = curs.fetchall()
+
+            # 遍历二级组织数据,构建org_map、third_org_list_map和third_org_map
+            for x in third_orgs:
+                org_map[x['id']] = x
+                third_org_list_map[x['parent_id']].append(x)
+                third_org_map[x['id']] = x
+
+            # 查询一级行政区划数据,并按area_id排序
+            sql = """
+                select * from common.area where area_grade = 1 order by area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            cities = curs.fetchall()
+
+            # 遍历一级行政区划数据,构建area_map
+            for city in cities:
+                area_map[city['area_id']] = city
+
+            # 查询二级行政区划数据,并按parent_id和area_id排序
+            sql = """
+                select * from common.area where area_grade = 2 order by parent_id, area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            districts = curs.fetchall()
+
+            # 遍历二级行政区划数据,构建area_map和district_list_map
+            for district in districts:
+                area_map[district['area_id']] = district
+
+            # 构建城市与区县的映射关系
+            for city in cities:
+                district_list_map[city['area_id']] = []
+                for district in districts:
+                    if city['area_id'] == district['parent_id']:
+                        district_list_map[city['area_id']].append(district)
+
+    # 读取 Excel 文件中的数据
+    df = pd.read_excel(io=input_path)
+    # 获取需要清理的列名列表,排除 "登记日期" 和 "年检时间" 列
+    columns_to_clean = list(filter(lambda x: x not in ('登记日期', '年检时间'), df.columns))
+    # 对需要清理的列进行字符串清理,移除多余的空白字符
+    df[columns_to_clean] = df[columns_to_clean].map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x)
+    df['账期'] = year_month
+    # 保存原始单位和车牌号信息到新的列中
+    df['原始一级单位'] = df['一级单位']
+    df['原始二级单位'] = df['二级单位']
+    df['原始三级单位'] = df['三级单位']
+    df['原始车牌号'] = df['车牌号']
+
+    # 定义函数,用于提取并标准化车牌号
+    def get_che_pai(che_pai):
+        # 如果车牌号为空或无效,则返回空字符串
+        if pd.isna(che_pai) or not che_pai or not che_pai.strip():
+            return ""
+        # 将车牌号转换为大写
+        upper_case = che_pai.upper()
+        # 移除车牌号中不符合规则的字符
+        s = not_che_pai_pattern.sub("", upper_case)
+        # 使用正则表达式匹配合法的车牌号
+        m = che_pai_pattern.search(s)
+        if m:
+            return m.group(0)
+
+        # 如果车牌号包含省份简称但未匹配成功,记录警告日志
+        if has_che_pai_province_pattern.search(che_pai):
+            logger.warning(f"车牌匹配失败: {che_pai} -> {s}")
+            return s
+
+        # 如果完全无法匹配,记录警告日志并返回原车牌号
+        logger.warning(f"车牌匹配失败: {che_pai} -> {upper_case}")
+        return upper_case
+
+    # 应用 get_che_pai 函数处理车牌号列
+    df['车牌号'] = df['车牌号'].apply(get_che_pai)
+    # 去重
+    df.drop_duplicates(subset=['车牌号'], keep='last', inplace=True)
+
+    # 定义函数,用于标记车牌号是否匹配失败
+    def che_pai_fail(che_pai):
+        # 如果车牌号为空或无效,则标记为失败
+        if pd.isna(che_pai) or not che_pai or not che_pai.strip():
+            return "1"
+
+        # 移除车牌号中不符合规则的字符
+        s = not_che_pai_pattern.sub("", che_pai.upper())
+        # 使用正则表达式匹配合法的车牌号
+        m = che_pai_pattern.search(s)
+        if m:
+            return "0"  # 匹配成功
+
+        return "1"  # 匹配失败
+
+    # 应用 che_pai_fail 函数生成车牌匹配失败标记列
+    df['车牌匹配失败'] = df['车牌号'].apply(che_pai_fail)
+
+    # 定义函数,用于提取一级单位
+    def get_first_unit(unit):
+        # 如果单位为空或无效,则返回空字符串
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 根据单位名称中的关键词返回对应的一级单位
+        if "机动通信局" in unit or "机动局" in unit or "传输局" in unit or "线路维护中心" in unit:
+            return "机动局"
+        if "雄安基地建设部" in unit:
+            return "雄安基地建设部"
+        if "华北基地建设部" in unit:
+            return "华北基地建设部"
+        # 遍历 er_ji_map 的键,寻找匹配的一级单位
+        for yj in er_ji_map.keys():
+            if yj in unit:
+                return yj
+        return "省公司本部"  # 默认返回省公司本部
+
+    # 应用 get_first_unit 函数生成一级单位列
+    df['一级单位'] = df['原始一级单位'].apply(get_first_unit)
+
+    # 定义函数,用于提取二级单位
+    def get_second_unit(x):
+        # 获取一级单位和原始二级单位
+        first_unit = str(x['一级单位']) if pd.notna(x['一级单位']) else ""
+        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        # 如果二级单位为空或无效,则返回一级单位
+        if not unit or not unit.strip():
+            return first_unit
+        # 如果一级单位是省公司本部,则返回省公司本部
+        if first_unit == "省公司本部":
+            return first_unit
+        # 如果一级单位是机动局,则根据单位名称进一步细化
+        if first_unit == "机动局":
+            for yj in er_ji_map.keys():
+                if yj in unit:
+                    return f"机动局{yj}"
+            return "机动局本部"
+        # 根据特定城市和关键词返回对应的二级单位
+        if first_unit == "石家庄":
+            if "开发区" in unit:
+                return "石家庄开发区"
+        if first_unit == "廊坊":
+            if "开发区" in unit:
+                return "廊坊开发区"
+        if first_unit == "邢台":
+            if "内丘" in unit:
+                return "内邱"
+            if "任泽" in unit:
+                return "任县"
+        if first_unit == "唐山":
+            if "高开区" in unit:
+                return "唐山高开区"
+            if "滦州" in unit:
+                return "滦县"
+        # 根据 er_ji_map 获取二级单位
+        ejs = er_ji_map.get(first_unit)
+        if not ejs:
+            return first_unit
+        if first_unit == "雄安":
+            unit = unit.replace("雄安新区", "")
+        for ej in ejs:
+            if ej in unit:
+                return ej
+        return f"{first_unit}本部"  # 默认返回一级单位本部
+
+    # 应用 get_second_unit 函数生成二级单位列
+    df['二级单位'] = df.apply(get_second_unit, axis=1)
+
+    # 定义函数,用于提取三级单位
+    def get_third_unit(x):
+        # 获取二级单位和原始三级单位
+        second_unit = str(x['二级单位']) if pd.notna(x['二级单位']) else ""
+        unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        # 如果三级单位为空或无效,则返回二级单位
+        if not unit or not unit.strip():
+            return second_unit
+        # 按下划线分割三级单位名称
+        a = unit.split("_")
+        if len(a) == 1:
+            return unit
+        if len(a) < 4:
+            return second_unit
+        return a[3]  # 返回分割后的第四个部分作为三级单位
+
+    # 应用 get_third_unit 函数生成三级单位列
+    df['三级单位'] = df.apply(get_third_unit, axis=1)
+
+    # 定义一个函数,用于根据单位名称获取二级组织机构编码
+    def get_area_no(unit):
+        # 如果单位为空或无效,则返回空字符串
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 如果单位包含特定关键词(如“机动通信局”等),返回固定编码"-11"
+        if any(keyword in unit for keyword in ["机动通信局", "机动局", "传输局", "线路维护中心"]):
+            return "-11"
+        # 如果单位包含特定关键词(如“省公司本部”等),返回固定编码"-12"
+        if any(keyword in unit for keyword in ["省公司本部", "雄安基地建设部", "华北基地建设部"]):
+            return "-12"
+        # 遍历second_orgs列表,匹配单位名称并返回对应的id
+        for second_org in second_orgs:
+            if second_org.get('name') in unit:
+                return second_org.get('id')
+        # 如果未匹配到任何规则,返回默认编码"-12"
+        return "-12"
+
+    # 将get_area_no函数应用到DataFrame的'原始一级单位'列,生成'二级组织机构编码'列
+    df['二级组织机构编码'] = df['原始一级单位'].apply(get_area_no)
+
+    # 定义一个函数,用于根据组织机构编码获取组织机构名称
+    def get_org_name(org_no):
+        # 如果编码为空或无效,则返回空字符串
+        if pd.isna(org_no) or not org_no or not org_no.strip():
+            return ""
+        # 在org_map中查找对应编码的组织机构信息,并返回其名称
+        po = org_map.get(org_no)
+        if po is not None:
+            return po.get('name')
+        return ""
+
+    # 将get_org_name函数应用到'二级组织机构编码'列,生成'二级组织机构名称'列
+    df['二级组织机构名称'] = df['二级组织机构编码'].apply(get_org_name)
+
+    # 定义一个函数,用于根据行数据获取三级组织机构编码
+    def get_city_no(x):
+        # 获取相关字段值,如果为空则设置为""
+        area_no = str(x['二级组织机构编码']) if pd.notna(x['二级组织机构编码']) else ""
+        area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
+        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        # 如果二级组织机构编码或名称为空,则返回""
+        if not area_no or not area_name:
+            return ""
+        # 根据不同的二级组织机构名称和单位内容,返回对应的三级组织机构编码
+        if area_name == "石家庄":
+            if "井陉矿区" in unit:
+                return "D0130185"
+            if "井陉" in unit:
+                return "D0130121"
+        if area_name == "秦皇岛":
+            if "北戴河新区" in unit:
+                return "D0130325"
+            if "北戴河" in unit:
+                return "D0130304"
+        if area_name == "邯郸":
+            if "峰峰" in unit:
+                return "D0130406"
+        if area_name == "邢台":
+            if "内丘" in unit:
+                return "D0130523"
+            if "任泽" in unit:
+                return "D0130526"
+        if area_name == "省机动局":
+            if "沧州" in unit:
+                return "HECS180"
+            if "唐山" in unit:
+                return "HECS181"
+            if "秦皇岛" in unit:
+                return "HECS182"
+            if "廊坊" in unit:
+                return "HECS183"
+            if "张家口" in unit:
+                return "HECS184"
+            if "邢台" in unit:
+                return "HECS185"
+            if "邯郸" in unit:
+                return "HECS186"
+            if "保定" in unit:
+                return "HECS187"
+            if "石家庄" in unit:
+                return "HECS188"
+            if "承德" in unit:
+                return "HECS189"
+            if "衡水" in unit:
+                return "HECS720"
+            if "雄安" in unit:
+                return "HECS728"
+            return "HECS018"
+        if area_name == "雄安":
+            unit = unit.replace("雄安新区", "")
+        l3 = third_org_list_map.get(area_no, [])
+        for organization_po in l3:
+            if organization_po.get('name') in unit:
+                return organization_po.get('id')
+        if area_name == "沧州":
+            return "D0130911"
+        if area_name == "唐山":
+            return "D0130202"
+        if area_name == "秦皇岛":
+            return "D0130302"
+        if area_name == "廊坊":
+            return "D0131000"
+        if area_name == "张家口":
+            return "D0130701"
+        if area_name == "邢台":
+            return "D0130502"
+        if area_name == "邯郸":
+            return "D0130402"
+        if area_name == "保定":
+            return "D0130601"
+        if area_name == "石家庄":
+            return "D0130186"
+        if area_name == "承德":
+            return "D0130801"
+        if area_name == "衡水":
+            return "D0133001"
+        if area_name == "雄安":
+            return "D0130830"
+        return "HE001"
+
+    # 将get_city_no函数应用到DataFrame的每一行,生成'三级组织机构编码'列
+    df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
+    # 将get_org_name函数应用到'三级组织机构编码'列,生成'三级组织机构名称'列
+    df['三级组织机构名称'] = df['三级组织机构编码'].apply(get_org_name)
+
+    # 定义一个函数,用于根据行数据获取二级组织机构编码2
+    def get_area_no2(x):
+        # 获取相关字段值,如果为空则设置为""
+        area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
+        city_name = str(x['三级组织机构名称']) if pd.notna(x['三级组织机构名称']) else ""
+        # 如果二级组织机构名称为空,则返回""
+        if not area_name or not area_name.strip():
+            return ""
+        # 根据二级组织机构名称和三级组织机构名称的内容,返回对应的编码
+        if area_name == "省机动局" and city_name and city_name.strip():
+            if "沧州" in city_name:
+                return "180"
+            if "唐山" in city_name:
+                return "181"
+            if "秦皇岛" in city_name:
+                return "182"
+            if "廊坊" in city_name:
+                return "183"
+            if "张家口" in city_name:
+                return "184"
+            if "邢台" in city_name:
+                return "185"
+            if "邯郸" in city_name:
+                return "186"
+            if "保定" in city_name:
+                return "187"
+            if "石家庄" in city_name:
+                return "188"
+            if "承德" in city_name:
+                return "189"
+            if "衡水" in city_name:
+                return "720"
+            if "雄安" in city_name:
+                return "782"
+        if "沧州" in area_name:
+            return "180"
+        if "唐山" in area_name:
+            return "181"
+        if "秦皇岛" in area_name:
+            return "182"
+        if "廊坊" in area_name:
+            return "183"
+        if "张家口" in area_name:
+            return "184"
+        if "邢台" in area_name:
+            return "185"
+        if "邯郸" in area_name:
+            return "186"
+        if "保定" in area_name:
+            return "187"
+        if "石家庄" in area_name:
+            return "188"
+        if "承德" in area_name:
+            return "189"
+        if "衡水" in area_name:
+            return "720"
+        if "雄安" in area_name:
+            return "782"
+        return ""
+
+    # 将get_area_no2函数应用到DataFrame的每一行,生成'二级组织机构编码2'列
+    df['二级组织机构编码2'] = df.apply(get_area_no2, axis=1)
+    # 将get_org_name函数应用到'二级组织机构编码2'列,生成'二级组织机构名称2'列
+    df['二级组织机构名称2'] = df['二级组织机构编码2'].apply(get_org_name)
+
+    # 定义一个函数,用于根据单位名称获取城市ID
+    def get_city_id(unit):
+        # 如果单位为空或无效,则返回""
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 遍历cities列表,匹配单位名称并返回对应的城市ID
+        for city in cities:
+            if city.get('short_name') and city['short_name'] in unit:
+                return city.get('area_id', "")
+        return ""
+
+    # 将get_city_id函数应用到'原始一级单位'列,生成'city_id'列
+    df['city_id'] = df['原始一级单位'].apply(get_city_id)
+
+    # 定义一个函数,用于根据ID获取区域名称
+    def get_area_name(id):
+        # 如果ID为空或无效,则返回""
+        if pd.isna(id) or not id or not id.strip():
+            return ""
+        # 在area_map中查找对应ID的区域信息,并返回其名称
+        area_po = area_map.get(id)
+        if area_po is not None:
+            return area_po.get("area_name", "")
+        return ""
+
+    # 将get_area_name函数应用到'city_id'列,生成'city'列
+    df['city'] = df['city_id'].apply(get_area_name)
+
+    # 定义一个函数,用于根据行数据获取区县ID
+    def get_district_id(x):
+        # 获取相关字段值,如果为空则设置为""
+        city_id = str(x['city_id']) if pd.notna(x['city_id']) else ""
+        city = str(x['city']) if pd.notna(x['city']) else ""
+        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        # 如果城市ID、城市名称或单位为空,则返回""
+        if not city_id or not city or not unit:
+            return ""
+        # 根据城市名称和单位内容,返回对应的区县ID
+        if city == "石家庄":
+            if "井陉矿区" in unit:
+                return "130107"
+            if "井陉" in unit:
+                return "130121"
+        if city == "雄安":
+            unit = unit.replace("雄安新区", "")
+        districts = district_list_map.get(city_id)
+        if not districts:
+            return ""
+        for district in districts:
+            if district.get('short_name') in unit:
+                return district.get('area_id')
+        return ""
+
+    # 将get_district_id函数应用到DataFrame的每一行,生成'district_id'列
+    df['district_id'] = df.apply(get_district_id, axis=1)
+    # 将get_area_name函数应用到'district_id'列,生成'district'列
+    df['district'] = df['district_id'].apply(get_area_name)
+
+    # 提取账期年份和月份信息
+    df['year_no'] = df['账期'].apply(lambda x: None if pd.isna(x) else str(x)[:4])
+    df['month_no'] = df['账期'].apply(lambda x: None if pd.isna(x) else str(x)[-2:])
+    df['年检时间'] = df['年检时间'].apply(lambda x: None if pd.isna(x) else pd.to_datetime(x).strftime('%Y%m'))
+
+    # 打印DataFrame的信息
+    print(df.info())
+
+    # 将处理后的数据保存到CSV文件中
+    df.to_csv(path_or_buf=output_path,
+              header=['year_month', 'che_pai_hao', 'che_xing', 'first_unit', 'second_unit', 'third_unit',
+                      'deng_ji_ri_qi', 'nian_jian_shi_jian', 'shi_fou_guo_jian', 'shi_fou_ben_yue_ying_jian',
+                      'ben_yue_shi_fou_nian_jian', 'raw_yi_ji', 'raw_er_ji', 'raw_san_ji', 'raw_che_pai_hao',
+                      'che_pai_fail', 'area_no', 'area_name', 'city_no', 'city_name', 'area_no2', 'area_name2',
+                      'city_id', 'city', 'district_id', 'district', 'year_no', 'month_no'],
+              index=False,
+              encoding='utf-8-sig')
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "car.car_guo_jian"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "year_month,che_pai_hao,che_xing,first_unit,second_unit,third_unit,deng_ji_ri_qi,nian_jian_shi_jian,shi_fou_guo_jian,shi_fou_ben_yue_ying_jian,ben_yue_shi_fou_nian_jian,raw_yi_ji,raw_er_ji,raw_san_ji,raw_che_pai_hao,che_pai_fail,area_no,area_name,city_no,city_name,area_no2,area_name2,city_id,city,district_id,district,year_no,month_no"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout, completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    remote_path = f'{remote_dir_path}{year_month}.xlsx'  # 定义远程主机的目标文件路径
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            # 记录日志,提示即将上传的本地文件和远程目标路径
+            logger.info("upload {} to {}", input_path, remote_path)
+            # 使用SFTP的put方法将本地文件上传到远程主机
+            sftp.put(input_path, remote_path)
+            # 记录日志,提示文件已成功上传
+            logger.info("uploaded {}", input_path)
+
+
+def data_update():
+    with psycopg.connect(
+            conninfo=conn_info,
+    ) as conn:
+        with conn.cursor() as curs:
+            # 插入过检
+            sql = f"""
+            insert
+                into
+                car_theme.wz_f_un_annual_inspectionleased_vehicles_details
+            (
+                statistical_month,
+                card_num,
+                car_brand,
+                city,
+                dpt_sec,
+                grid,
+                enable_date,
+                passed_inspection,
+                should_inspection_be_conducted_this_month,
+                annual_inspection_this_month,
+                inspect_annually_date
+            )
+            select
+                year_month as statistical_month,
+                che_pai_hao as card_num,
+                che_xing as car_brand,
+                first_unit as city,
+                second_unit as dpt_sec,
+                third_unit as grid,
+                deng_ji_ri_qi as enable_date,
+                shi_fou_guo_jian as passed_inspection,
+                shi_fou_ben_yue_ying_jian as should_inspection_be_conducted_this_month,
+                ben_yue_shi_fou_nian_jian as annual_inspection_this_month,
+                nian_jian_shi_jian as inspect_annually_date
+            from
+                car.car_guo_jian
+            where
+                year_month = {year_month}
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+
+
+data_process()
+data_import()
+upload_file()
+data_update()

+ 79 - 0
car/car-li-cheng-day-missing/car_li_cheng_day_missing.py

@@ -0,0 +1,79 @@
+"""睿行车辆行驶里程日统计缺失数据日期
+"""
+
+import calendar  # 提供处理日历的功能,如获取某月的最后一天
+from datetime import datetime, timedelta  # 处理日期和时间
+from dateutil.relativedelta import relativedelta  # 提供相对时间计算功能
+from loguru import logger  # 日志记录工具
+import pandas as pd  # 数据分析和处理库
+import psycopg  # PostgreSQL数据库连接库
+
+
+# 定义一个函数,用于获取指定日期所在月份的最后一天
+def get_last_day_of_month(t):
+    _, last_day = calendar.monthrange(t.year, t.month)  # 获取指定年月的最后一天
+    return datetime(t.year, t.month, last_day)  # 返回该月最后一天的日期对象
+
+
+# 获取当前日期
+today = datetime.today()
+
+# 计算上个月的第一天(即 start_date)
+start_date = today - relativedelta(months=1, day=1)
+
+# 获取上个月的最后一天(即 end_date)
+end_date = get_last_day_of_month(start_date)
+
+# 初始化一个空列表,用于存储查询结果
+a = []
+
+# 使用 psycopg 连接 PostgreSQL 数据库
+with psycopg.connect(
+        conninfo="host='172.16.107.5' port=5432 user='finance' password='Finance@unicom23' dbname='financialdb'",
+        row_factory=psycopg.rows.dict_row  # 指定返回结果为字典格式
+) as conn:
+    with conn.cursor() as curs:  # 创建游标对象
+        # 构建 SQL 查询语句
+        sql = f"""
+        with
+        t1 as (
+            -- 生成从 start_date 到 end_date 的每一天日期序列
+            select generate_series('{start_date.strftime('%Y-%m-%d')}'::date, '{end_date.strftime('%Y-%m-%d')}'::date, '1 day'::interval)::date as data_date
+        ),
+        t2 as (
+            -- 从 car_li_cheng_day 表中提取指定月份的所有唯一日期
+            select distinct data_date 
+            from car.car_li_cheng_day 
+            where year_month = {start_date.strftime('%Y%m')} 
+            order by data_date
+        )
+        -- 查询在 t1 中存在但在 t2 中不存在的日期(即缺失的日期)
+        select * 
+        from t1 
+        where not exists (select 1 from t2 where t2.data_date = t1.data_date)
+        """
+        # 打印 SQL 查询语句以便调试
+        logger.info(f"sql: {sql}")
+
+        # 执行 SQL 查询
+        curs.execute(sql)
+
+        # 获取查询结果
+        list = curs.fetchall()
+
+        # 遍历查询结果,将缺失日期添加到列表 a 中
+        for x in list:
+            a.append(x.get('data_date'))
+
+# 将缺失日期列表转换为 Pandas DataFrame
+df = pd.DataFrame(a)
+
+# 打印 DataFrame 的基本信息(如列名、数据类型等)
+print(df.info())
+
+# 将缺失日期保存到 Excel 文件中
+df.to_excel(
+    f"{start_date.strftime('%Y%m')}睿行车辆行驶里程日统计缺失数据日期.xlsx",  # 文件名包含月份信息
+    index=False,  # 不保存索引
+    header=False  # 不保存表头
+)

+ 752 - 0
car/car-wei-zhang/car_wei_zhang.py

@@ -0,0 +1,752 @@
+"""车辆违章数据处理
+"""
+
+import re
+import subprocess
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from loguru import logger
+import pandas as pd
+import psycopg
+import paramiko
+
+# 添加日志记录,将日志输出到文件 a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/car/wei-zhang/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+conn_info= f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+# 获取当前日期,并计算上个月的第一天
+today = datetime.today()
+start_date = today - relativedelta(months=1, day=1)
+year_month = start_date.strftime('%Y%m')
+# 数据文件路径
+input_path = 'data.xlsx'
+# 输出文件路径
+output_path = 'output.csv'
+
+
+def data_process():
+    # 正则表达式匹配车牌省份简称(如京、津、晋等)
+    has_che_pai_province_pattern = re.compile(
+        "[" + re.escape("京津晋冀蒙辽吉黑沪苏浙皖闽赣鲁豫鄂湘粤桂琼渝川贵云藏陕甘青宁国防") + "]")
+
+    # 正则表达式匹配非车牌字符,排除车牌可能包含的字符(如字母、数字、特殊标志等)
+    not_che_pai_pattern = re.compile(
+        "[^京津晋冀蒙辽吉黑沪苏浙皖闽赣鲁豫鄂湘粤桂琼渝川贵云藏陕甘青宁新港澳学挂领试超练警国防A-Z\\d]")
+
+    # 正则表达式匹配完整的车牌号格式
+    che_pai_pattern = re.compile(
+        r"([京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z]"
+        r"(([DF]((?![IO])[A-Z0-9](?![IO]))\d{4})|(\d{5}[DF]))|"
+        r"[京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z][A-Z0-9]{4}[A-Z0-9挂学警港澳])"
+    )
+
+    # 定义二级行政区划映射表(地级市及其下属区县)
+    er_ji_map = {
+        "石家庄": ["鹿泉", "藁城", "栾城", "井陉矿区", "井陉", "无极", "正定", "元氏", "新乐", "晋州", "平山", "灵寿",
+                   "赞皇", "赵县", "行唐", "高邑", "辛集", "深泽"],
+        "唐山": ["唐山高开区", "迁西", "海港", "开平", "丰南", "滦县", "乐亭", "丰润", "玉田", "古冶", "曹妃甸", "遵化",
+                 "滦南", "迁安"],
+        "秦皇岛": ["北戴河新区", "北戴河", "山海关", "昌黎", "卢龙", "青龙", "抚宁"],
+        "邯郸": ["曲周", "魏县", "馆陶", "磁县", "大名", "鸡泽", "成安", "涉县", "永年", "武安", "峰峰", "广平", "临漳",
+                 "邱县", "肥乡"],
+        "邢台": ["新河", "南宫", "隆尧", "内邱", "平乡", "宁晋", "广宗", "清河", "临西", "任县", "巨鹿", "沙河", "威县",
+                 "临城", "柏乡", "南和"],
+        "保定": ["涞水", "蠡县", "顺平", "博野", "安国", "涞源", "唐县", "定州", "高阳", "曲阳", "阜平", "清苑",
+                 "高碑店",
+                 "满城", "涿州", "易县", "望都", "徐水", "定兴", "白沟"],
+        "张家口": ["张北", "崇礼", "康保", "赤城", "阳原", "万全", "下花园", "尚义", "怀安", "怀来", "蔚县", "涿鹿",
+                   "沽源",
+                   "宣化"],
+        "承德": ["承德县", "兴隆", "宽城", "平泉", "营子", "隆化", "滦平", "围场", "丰宁", "双滦"],
+        "廊坊": ["文安", "霸州", "大城", "廊坊开发区", "三河", "香河", "永清", "胜芳", "燕郊", "固安", "大厂"],
+        "沧州": ["东光", "吴桥", "黄骅", "盐山", "孟村", "泊头", "献县", "南皮", "渤海新区", "海兴", "沧县", "河间",
+                 "青县",
+                 "任丘", "肃宁"],
+        "衡水": ["景县", "阜城", "枣强", "深州", "饶阳", "故城", "武强", "武邑", "冀州", "安平"],
+        "雄安": ["容城", "雄县", "安新"]
+    }
+
+    # 初始化组织结构映射表
+    org_map = {}
+    third_org_map = {}
+    third_org_list_map = {}
+    area_map = {}
+    district_list_map = {}
+
+    # 连接PostgreSQL数据库
+    with psycopg.connect(
+            conninfo=conn_info,
+            row_factory=psycopg.rows.dict_row
+    ) as conn:
+        with conn.cursor() as curs:
+            # 查询一级组织数据,并按order_num排序
+            sql = """
+                select * from common.organization where grade = 1 order by order_num
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            second_orgs = curs.fetchall()
+
+            # 遍历一级组织数据,构建org_map和third_org_list_map
+            for x in second_orgs:
+                org_map[x['id']] = x
+                third_org_list_map[x['id']] = []
+
+            # 查询二级组织数据,并按parent_id和order_num排序
+            sql = """
+                select * from common.organization where grade = 2 order by parent_id, order_num
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            third_orgs = curs.fetchall()
+
+            # 遍历二级组织数据,构建org_map、third_org_list_map和third_org_map
+            for x in third_orgs:
+                org_map[x['id']] = x
+                third_org_list_map[x['parent_id']].append(x)
+                third_org_map[x['id']] = x
+
+            # 查询一级行政区划数据,并按area_id排序
+            sql = """
+                select * from common.area where area_grade = 1 order by area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            cities = curs.fetchall()
+
+            # 遍历一级行政区划数据,构建area_map
+            for city in cities:
+                area_map[city['area_id']] = city
+
+            # 查询二级行政区划数据,并按parent_id和area_id排序
+            sql = """
+                select * from common.area where area_grade = 2 order by parent_id, area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            districts = curs.fetchall()
+
+            # 遍历二级行政区划数据,构建area_map和district_list_map
+            for district in districts:
+                area_map[district['area_id']] = district
+
+            # 构建城市与区县的映射关系
+            for city in cities:
+                district_list_map[city['area_id']] = []
+                for district in districts:
+                    if city['area_id'] == district['parent_id']:
+                        district_list_map[city['area_id']].append(district)
+
+    # 读取 Excel 文件中的数据
+    df = pd.read_excel(io=input_path)
+    # 获取需要清理的列名列表,排除 "违章时间" 和 "处理时间" 列
+    columns_to_clean = list(filter(lambda x: x not in ('违章时间', '处理时间'), df.columns))
+    # 对需要清理的列进行字符串清理,移除多余的空白字符
+    df[columns_to_clean] = df[columns_to_clean].map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x)
+    df['账期'] = year_month
+    # 保存原始单位和车牌号信息到新的列中
+    df['原始一级单位'] = df['一级单位']
+    df['原始二级单位'] = df['二级单位']
+    df['原始三级单位'] = df['三级单位']
+    df['原始车牌号'] = df['车牌号']
+
+    # 定义函数,用于提取并标准化车牌号
+    def get_che_pai(che_pai):
+        # 如果车牌号为空或无效,则返回空字符串
+        if pd.isna(che_pai) or not che_pai or not che_pai.strip():
+            return ""
+        # 将车牌号转换为大写
+        upper_case = che_pai.upper()
+        # 移除车牌号中不符合规则的字符
+        s = not_che_pai_pattern.sub("", upper_case)
+        # 使用正则表达式匹配合法的车牌号
+        m = che_pai_pattern.search(s)
+        if m:
+            return m.group(0)
+
+        # 如果车牌号包含省份简称但未匹配成功,记录警告日志
+        if has_che_pai_province_pattern.search(che_pai):
+            logger.warning(f"车牌匹配失败: {che_pai} -> {s}")
+            return s
+
+        # 如果完全无法匹配,记录警告日志并返回原车牌号
+        logger.warning(f"车牌匹配失败: {che_pai} -> {upper_case}")
+        return upper_case
+
+    # 应用 get_che_pai 函数处理车牌号列
+    df['车牌号'] = df['车牌号'].apply(get_che_pai)
+
+    # 定义函数,用于标记车牌号是否匹配失败
+    def che_pai_fail(che_pai):
+        # 如果车牌号为空或无效,则标记为失败
+        if pd.isna(che_pai) or not che_pai or not che_pai.strip():
+            return "1"
+
+        # 移除车牌号中不符合规则的字符
+        s = not_che_pai_pattern.sub("", che_pai.upper())
+        # 使用正则表达式匹配合法的车牌号
+        m = che_pai_pattern.search(s)
+        if m:
+            return "0"  # 匹配成功
+
+        return "1"  # 匹配失败
+
+    # 应用 che_pai_fail 函数生成车牌匹配失败标记列
+    df['车牌匹配失败'] = df['车牌号'].apply(che_pai_fail)
+
+    # 定义函数,用于提取一级单位
+    def get_first_unit(unit):
+        # 如果单位为空或无效,则返回空字符串
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 根据单位名称中的关键词返回对应的一级单位
+        if "机动通信局" in unit or "机动局" in unit or "传输局" in unit or "线路维护中心" in unit:
+            return "机动局"
+        if "雄安基地建设部" in unit:
+            return "雄安基地建设部"
+        if "华北基地建设部" in unit:
+            return "华北基地建设部"
+        # 遍历 er_ji_map 的键,寻找匹配的一级单位
+        for yj in er_ji_map.keys():
+            if yj in unit:
+                return yj
+        return "省公司本部"  # 默认返回省公司本部
+
+    # 应用 get_first_unit 函数生成一级单位列
+    df['一级单位'] = df['原始一级单位'].apply(get_first_unit)
+
+    # 定义函数,用于提取二级单位
+    def get_second_unit(x):
+        # 获取一级单位和原始二级单位
+        first_unit = str(x['一级单位']) if pd.notna(x['一级单位']) else ""
+        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        # 如果二级单位为空或无效,则返回一级单位
+        if not unit or not unit.strip():
+            return first_unit
+        # 如果一级单位是省公司本部,则返回省公司本部
+        if first_unit == "省公司本部":
+            return first_unit
+        # 如果一级单位是机动局,则根据单位名称进一步细化
+        if first_unit == "机动局":
+            for yj in er_ji_map.keys():
+                if yj in unit:
+                    return f"机动局{yj}"
+            return "机动局本部"
+        # 根据特定城市和关键词返回对应的二级单位
+        if first_unit == "石家庄":
+            if "开发区" in unit:
+                return "石家庄开发区"
+        if first_unit == "廊坊":
+            if "开发区" in unit:
+                return "廊坊开发区"
+        if first_unit == "邢台":
+            if "内丘" in unit:
+                return "内邱"
+            if "任泽" in unit:
+                return "任县"
+        if first_unit == "唐山":
+            if "高开区" in unit:
+                return "唐山高开区"
+            if "滦州" in unit:
+                return "滦县"
+        # 根据 er_ji_map 获取二级单位
+        ejs = er_ji_map.get(first_unit)
+        if not ejs:
+            return first_unit
+        if first_unit == "雄安":
+            unit = unit.replace("雄安新区", "")
+        for ej in ejs:
+            if ej in unit:
+                return ej
+        return f"{first_unit}本部"  # 默认返回一级单位本部
+
+    # 应用 get_second_unit 函数生成二级单位列
+    df['二级单位'] = df.apply(get_second_unit, axis=1)
+
+    # 定义函数,用于提取三级单位
+    def get_third_unit(x):
+        # 获取二级单位和原始三级单位
+        second_unit = str(x['二级单位']) if pd.notna(x['二级单位']) else ""
+        unit = str(x['原始三级单位']) if pd.notna(x['原始三级单位']) else ""
+        # 如果三级单位为空或无效,则返回二级单位
+        if not unit or not unit.strip():
+            return second_unit
+        # 按下划线分割三级单位名称
+        a = unit.split("_")
+        if len(a) == 1:
+            return unit
+        if len(a) < 4:
+            return second_unit
+        return a[3]  # 返回分割后的第四个部分作为三级单位
+
+    # 应用 get_third_unit 函数生成三级单位列
+    df['三级单位'] = df.apply(get_third_unit, axis=1)
+
+    # 定义一个函数,用于根据单位名称获取二级组织机构编码
+    def get_area_no(unit):
+        # 如果单位为空或无效,则返回空字符串
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 如果单位包含特定关键词(如“机动通信局”等),返回固定编码"-11"
+        if any(keyword in unit for keyword in ["机动通信局", "机动局", "传输局", "线路维护中心"]):
+            return "-11"
+        # 如果单位包含特定关键词(如“省公司本部”等),返回固定编码"-12"
+        if any(keyword in unit for keyword in ["省公司本部", "雄安基地建设部", "华北基地建设部"]):
+            return "-12"
+        # 遍历second_orgs列表,匹配单位名称并返回对应的id
+        for second_org in second_orgs:
+            if second_org.get('name') in unit:
+                return second_org.get('id')
+        # 如果未匹配到任何规则,返回默认编码"-12"
+        return "-12"
+
+    # 将get_area_no函数应用到DataFrame的'原始一级单位'列,生成'二级组织机构编码'列
+    df['二级组织机构编码'] = df['原始一级单位'].apply(get_area_no)
+
+    # 定义一个函数,用于根据组织机构编码获取组织机构名称
+    def get_org_name(org_no):
+        # 如果编码为空或无效,则返回空字符串
+        if pd.isna(org_no) or not org_no or not org_no.strip():
+            return ""
+        # 在org_map中查找对应编码的组织机构信息,并返回其名称
+        po = org_map.get(org_no)
+        if po is not None:
+            return po.get('name')
+        return ""
+
+    # 将get_org_name函数应用到'二级组织机构编码'列,生成'二级组织机构名称'列
+    df['二级组织机构名称'] = df['二级组织机构编码'].apply(get_org_name)
+
+    # 定义一个函数,用于根据行数据获取三级组织机构编码
+    def get_city_no(x):
+        # 获取相关字段值,如果为空则设置为""
+        area_no = str(x['二级组织机构编码']) if pd.notna(x['二级组织机构编码']) else ""
+        area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
+        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        # 如果二级组织机构编码或名称为空,则返回""
+        if not area_no or not area_name:
+            return ""
+        # 根据不同的二级组织机构名称和单位内容,返回对应的三级组织机构编码
+        if area_name == "石家庄":
+            if "井陉矿区" in unit:
+                return "D0130185"
+            if "井陉" in unit:
+                return "D0130121"
+        if area_name == "秦皇岛":
+            if "北戴河新区" in unit:
+                return "D0130325"
+            if "北戴河" in unit:
+                return "D0130304"
+        if area_name == "邯郸":
+            if "峰峰" in unit:
+                return "D0130406"
+        if area_name == "邢台":
+            if "内丘" in unit:
+                return "D0130523"
+            if "任泽" in unit:
+                return "D0130526"
+        if area_name == "省机动局":
+            if "沧州" in unit:
+                return "HECS180"
+            if "唐山" in unit:
+                return "HECS181"
+            if "秦皇岛" in unit:
+                return "HECS182"
+            if "廊坊" in unit:
+                return "HECS183"
+            if "张家口" in unit:
+                return "HECS184"
+            if "邢台" in unit:
+                return "HECS185"
+            if "邯郸" in unit:
+                return "HECS186"
+            if "保定" in unit:
+                return "HECS187"
+            if "石家庄" in unit:
+                return "HECS188"
+            if "承德" in unit:
+                return "HECS189"
+            if "衡水" in unit:
+                return "HECS720"
+            if "雄安" in unit:
+                return "HECS728"
+            return "HECS018"
+        if area_name == "雄安":
+            unit = unit.replace("雄安新区", "")
+        l3 = third_org_list_map.get(area_no, [])
+        for organization_po in l3:
+            if organization_po.get('name') in unit:
+                return organization_po.get('id')
+        if area_name == "沧州":
+            return "D0130911"
+        if area_name == "唐山":
+            return "D0130202"
+        if area_name == "秦皇岛":
+            return "D0130302"
+        if area_name == "廊坊":
+            return "D0131000"
+        if area_name == "张家口":
+            return "D0130701"
+        if area_name == "邢台":
+            return "D0130502"
+        if area_name == "邯郸":
+            return "D0130402"
+        if area_name == "保定":
+            return "D0130601"
+        if area_name == "石家庄":
+            return "D0130186"
+        if area_name == "承德":
+            return "D0130801"
+        if area_name == "衡水":
+            return "D0133001"
+        if area_name == "雄安":
+            return "D0130830"
+        return "HE001"
+
+    # 将get_city_no函数应用到DataFrame的每一行,生成'三级组织机构编码'列
+    df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
+    # 将get_org_name函数应用到'三级组织机构编码'列,生成'三级组织机构名称'列
+    df['三级组织机构名称'] = df['三级组织机构编码'].apply(get_org_name)
+
+    # 定义一个函数,用于根据行数据获取二级组织机构编码2
+    def get_area_no2(x):
+        # 获取相关字段值,如果为空则设置为""
+        area_name = str(x['二级组织机构名称']) if pd.notna(x['二级组织机构名称']) else ""
+        city_name = str(x['三级组织机构名称']) if pd.notna(x['三级组织机构名称']) else ""
+        # 如果二级组织机构名称为空,则返回""
+        if not area_name or not area_name.strip():
+            return ""
+        # 根据二级组织机构名称和三级组织机构名称的内容,返回对应的编码
+        if area_name == "省机动局" and city_name and city_name.strip():
+            if "沧州" in city_name:
+                return "180"
+            if "唐山" in city_name:
+                return "181"
+            if "秦皇岛" in city_name:
+                return "182"
+            if "廊坊" in city_name:
+                return "183"
+            if "张家口" in city_name:
+                return "184"
+            if "邢台" in city_name:
+                return "185"
+            if "邯郸" in city_name:
+                return "186"
+            if "保定" in city_name:
+                return "187"
+            if "石家庄" in city_name:
+                return "188"
+            if "承德" in city_name:
+                return "189"
+            if "衡水" in city_name:
+                return "720"
+            if "雄安" in city_name:
+                return "782"
+        if "沧州" in area_name:
+            return "180"
+        if "唐山" in area_name:
+            return "181"
+        if "秦皇岛" in area_name:
+            return "182"
+        if "廊坊" in area_name:
+            return "183"
+        if "张家口" in area_name:
+            return "184"
+        if "邢台" in area_name:
+            return "185"
+        if "邯郸" in area_name:
+            return "186"
+        if "保定" in area_name:
+            return "187"
+        if "石家庄" in area_name:
+            return "188"
+        if "承德" in area_name:
+            return "189"
+        if "衡水" in area_name:
+            return "720"
+        if "雄安" in area_name:
+            return "782"
+        return ""
+
+    # 将get_area_no2函数应用到DataFrame的每一行,生成'二级组织机构编码2'列
+    df['二级组织机构编码2'] = df.apply(get_area_no2, axis=1)
+    # 将get_org_name函数应用到'二级组织机构编码2'列,生成'二级组织机构名称2'列
+    df['二级组织机构名称2'] = df['二级组织机构编码2'].apply(get_org_name)
+
+    # 定义一个函数,用于根据单位名称获取城市ID
+    def get_city_id(unit):
+        # 如果单位为空或无效,则返回""
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 遍历cities列表,匹配单位名称并返回对应的城市ID
+        for city in cities:
+            if city.get('short_name') and city['short_name'] in unit:
+                return city.get('area_id', "")
+        return ""
+
+    # 将get_city_id函数应用到'原始一级单位'列,生成'city_id'列
+    df['city_id'] = df['原始一级单位'].apply(get_city_id)
+
+    # 定义一个函数,用于根据ID获取区域名称
+    def get_area_name(id):
+        # 如果ID为空或无效,则返回""
+        if pd.isna(id) or not id or not id.strip():
+            return ""
+        # 在area_map中查找对应ID的区域信息,并返回其名称
+        area_po = area_map.get(id)
+        if area_po is not None:
+            return area_po.get("area_name", "")
+        return ""
+
+    # 将get_area_name函数应用到'city_id'列,生成'city'列
+    df['city'] = df['city_id'].apply(get_area_name)
+
+    # 定义一个函数,用于根据行数据获取区县ID
+    def get_district_id(x):
+        # 获取相关字段值,如果为空则设置为""
+        city_id = str(x['city_id']) if pd.notna(x['city_id']) else ""
+        city = str(x['city']) if pd.notna(x['city']) else ""
+        unit = str(x['原始二级单位']) if pd.notna(x['原始二级单位']) else ""
+        # 如果城市ID、城市名称或单位为空,则返回""
+        if not city_id or not city or not unit:
+            return ""
+        # 根据城市名称和单位内容,返回对应的区县ID
+        if city == "石家庄":
+            if "井陉矿区" in unit:
+                return "130107"
+            if "井陉" in unit:
+                return "130121"
+        if city == "雄安":
+            unit = unit.replace("雄安新区", "")
+        districts = district_list_map.get(city_id)
+        if not districts:
+            return ""
+        for district in districts:
+            if district.get('short_name') in unit:
+                return district.get('area_id')
+        return ""
+
+    # 将get_district_id函数应用到DataFrame的每一行,生成'district_id'列
+    df['district_id'] = df.apply(get_district_id, axis=1)
+    # 将get_area_name函数应用到'district_id'列,生成'district'列
+    df['district'] = df['district_id'].apply(get_area_name)
+
+    # 提取账期年份和月份信息
+    df['year_no'] = df['账期'].apply(lambda x: None if pd.isna(x) else str(x)[:4])
+    df['month_no'] = df['账期'].apply(lambda x: None if pd.isna(x) else str(x)[-2:])
+    # 格式化违章时间和处理时间为年月格式
+    df['违章年月'] = df['违章时间'].apply(lambda x: None if pd.isna(x) else pd.to_datetime(x).strftime('%Y%m'))
+    df['处理年月'] = df['处理时间'].apply(lambda x: None if pd.isna(x) else pd.to_datetime(x).strftime('%Y%m'))
+
+    # 打印DataFrame的信息
+    print(df.info())
+
+    # 将处理后的数据保存到CSV文件中
+    df.to_csv(path_or_buf=output_path,
+              header=['year_month', 'che_pai_hao', 'first_unit', 'second_unit', 'third_unit', 'che_jia_hao',
+                      'wei_zhang_shi_jian', 'wei_zhang_di_dian', 'wei_zhang_xiang_qing', 'kou_fen', 'fa_kuan',
+                      'chu_li_zhuang_tai', 'chu_li_shi_jian', 'wei_zhang_wei_chu_li_shi_chang', 'raw_yi_ji',
+                      'raw_er_ji',
+                      'raw_san_ji', 'raw_che_pai_hao', 'che_pai_fail', 'area_no', 'area_name', 'city_no', 'city_name',
+                      'area_no2', 'area_name2', 'city_id', 'city', 'district_id', 'district', 'year_no', 'month_no',
+                      'wei_zhang_nian_yue', 'chu_li_nian_yue'],
+              index=False,
+              encoding='utf-8-sig')
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "car.car_wei_zhang"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "year_month,che_pai_hao,first_unit,second_unit,third_unit,che_jia_hao,wei_zhang_shi_jian,wei_zhang_di_dian,wei_zhang_xiang_qing,kou_fen,fa_kuan,chu_li_zhuang_tai,chu_li_shi_jian,wei_zhang_wei_chu_li_shi_chang,raw_yi_ji,raw_er_ji,raw_san_ji,raw_che_pai_hao,che_pai_fail,area_no,area_name,city_no,city_name,area_no2,area_name2,city_id,city,district_id,district,year_no,month_no,wei_zhang_nian_yue,chu_li_nian_yue"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,             # 执行的命令
+        check=False,          # 如果命令执行失败,不抛出异常
+        text=True,           # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout, completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    remote_path = f'{remote_dir_path}{year_month}.xlsx'  # 定义远程主机的目标文件路径
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            # 记录日志,提示即将上传的本地文件和远程目标路径
+            logger.info("upload {} to {}", input_path, remote_path)
+            # 使用SFTP的put方法将本地文件上传到远程主机
+            sftp.put(input_path, remote_path)
+            # 记录日志,提示文件已成功上传
+            logger.info("uploaded {}", input_path)
+
+
+def data_update():
+    with psycopg.connect(
+            conninfo=conn_info,
+    ) as conn:
+        with conn.cursor() as curs:
+            # 插入违章长期未处理
+            sql = f"""
+                insert
+                    into
+                    car.car_wei_zhang_chang_qi
+                (
+                    year_month,
+                    che_pai_hao,
+                    raw_yi_ji,
+                    raw_er_ji,
+                    raw_san_ji,
+                    wei_zhang_shi_jian,
+                    wei_zhang_di_dian,
+                    wei_zhang_xiang_qing,
+                    kou_fen,
+                    fa_kuan,
+                    wei_zhang_wei_chu_li_shi_chang,
+                    chu_li_zhuang_tai,
+                    first_unit,
+                    second_unit,
+                    third_unit,
+                    area_no,
+                    area_name,
+                    city_no,
+                    city_name,
+                    area_name2,
+                    area_no2,
+                    city_id,
+                    city,
+                    district_id,
+                    district,
+                    raw_che_pai_hao,
+                    che_pai_fail,
+                    wei_zhang_nian_yue,
+                    year_no,
+                    month_no,
+                    source
+                )
+                select
+                    year_month,
+                    che_pai_hao,
+                    raw_yi_ji,
+                    raw_er_ji,
+                    raw_san_ji,
+                    wei_zhang_shi_jian,
+                    wei_zhang_di_dian,
+                    wei_zhang_xiang_qing,
+                    kou_fen,
+                    fa_kuan,
+                    wei_zhang_wei_chu_li_shi_chang,
+                    chu_li_zhuang_tai,
+                    first_unit,
+                    second_unit,
+                    third_unit,
+                    area_no,
+                    area_name,
+                    city_no,
+                    city_name,
+                    area_name2,
+                    area_no2,
+                    city_id,
+                    city,
+                    district_id,
+                    district,
+                    raw_che_pai_hao,
+                    che_pai_fail,
+                    wei_zhang_nian_yue,
+                    year_no,
+                    month_no,
+                    source
+                from
+                    car.car_wei_zhang
+                where
+                    chu_li_zhuang_tai = '未处理'
+                    and wei_zhang_wei_chu_li_shi_chang > 150
+                    and year_month = {year_month}
+                            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+            # 插入违章
+            sql = f"""
+            insert
+                into
+                car_theme.wz_f_violation_details
+            (
+                statistical_month,
+                card_num,
+                city,
+                dpt_sec,
+                grid,
+                violation_time,
+                violation_location,
+                violation_details,
+                deduction_points,
+                fine,
+                processing_time,
+                unprocessed_duration_of_violation,
+                offline_actual_processing_status
+            )
+            select
+                year_month,
+                che_pai_hao,
+                first_unit,
+                second_unit,
+                third_unit,
+                wei_zhang_shi_jian,
+                wei_zhang_di_dian,
+                wei_zhang_xiang_qing,
+                kou_fen,
+                fa_kuan,
+                chu_li_shi_jian,
+                wei_zhang_wei_chu_li_shi_chang,
+                chu_li_zhuang_tai
+            from
+                car.car_wei_zhang
+            where
+                year_month = {year_month}
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+
+
+data_process()
+data_import()
+upload_file()
+data_update()

+ 72 - 0
car/car-yue-jie-missing/car_yue_jie_missing.py

@@ -0,0 +1,72 @@
+"""睿行车辆越界报警日统计缺失数据日期
+"""
+
+import calendar  # 导入日历模块,用于处理日期相关操作
+from datetime import datetime  # 导入日期时间模块,用于处理日期和时间
+import pandas as pd  # 导入pandas库,用于数据处理与分析
+import psycopg  # 导入psycopg库,用于连接PostgreSQL数据库
+from dateutil.relativedelta import relativedelta  # 导入relativedelta模块,用于日期的相对计算
+from loguru import logger  # 导入loguru库,用于日志记录
+
+
+# 定义一个函数,获取指定日期所在月份的最后一天
+def get_last_day_of_month(t):
+    _, last_day = calendar.monthrange(t.year, t.month)  # 获取指定年月的最后一天
+    return datetime(t.year, t.month, last_day)  # 返回该月最后一天的日期对象
+
+
+# 获取当前日期
+today = datetime.today()
+# 计算上一个月的第一天
+start_date = today - relativedelta(months=1, day=1)
+# 计算上一个月的最后一天
+end_date = get_last_day_of_month(start_date)
+
+# 初始化一个空列表,用于存储查询结果
+a = []
+
+# 使用psycopg连接到PostgreSQL数据库
+with psycopg.connect(
+        conninfo="host='172.16.107.5' port=5432 user='finance' password='Finance@unicom23' dbname='financialdb'",
+        row_factory=psycopg.rows.dict_row  # 设置行工厂为字典格式,方便后续操作
+) as conn:
+    with conn.cursor() as curs:  # 创建游标对象,用于执行SQL语句
+        # 构造SQL查询语句
+        sql = f"""
+        with
+        t1 as (
+            -- 生成从start_date到end_date之间的所有日期序列
+            select generate_series('{start_date.strftime('%Y-%m-%d')}'::date, '{end_date.strftime('%Y-%m-%d')}'::date, '1 day'::interval)::date as data_date
+        ),
+        t2 as (
+            -- 从car.car_yue_jie表中提取指定年月的所有唯一日期
+            select distinct data_date 
+            from car.car_yue_jie 
+            where year_month = {start_date.strftime('%Y%m')} 
+            order by data_date
+        )
+        -- 查询在t1中存在但在t2中不存在的日期(即缺失的日期)
+        select * 
+        from t1 
+        where not exists (select 1 from t2 where t2.data_date = t1.data_date)
+        """
+        # 打印SQL语句,便于调试
+        logger.info(f"sql: {sql}")
+        # 执行SQL语句
+        curs.execute(sql)
+        # 获取查询结果
+        list = curs.fetchall()
+        # 遍历查询结果,将data_date字段的值添加到列表a中
+        for x in list:
+            a.append(x.get('data_date'))
+
+# 将列表a转换为pandas DataFrame
+df = pd.DataFrame(a)
+# 打印DataFrame的基本信息,便于调试
+print(df.info())
+# 将DataFrame保存为Excel文件,文件名为“睿行车辆越界报警日统计缺失数据日期.xlsx”
+df.to_excel(
+    f"{start_date.strftime('%Y%m')}睿行车辆越界报警日统计缺失数据日期.xlsx",
+    index=False,  # 不保存索引
+    header=False  # 不保存列名
+)

+ 287 - 0
car/car-zu-lin/car_zu_lin.py

@@ -0,0 +1,287 @@
+"""车辆租赁合同数据处理
+"""
+
+import re
+import subprocess
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from loguru import logger
+import pandas as pd
+import paramiko
+
+# 添加日志记录,将日志输出到文件 a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/car/zu-lin/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+# 数据文件路径
+input_path = 'data.xlsx'
+# 输出文件路径
+output_path = 'output.csv'
+
+
+def data_process():
+    # 正则表达式匹配车牌省份简称(如京、津、晋等)
+    has_che_pai_province_pattern = re.compile(
+        "[" + re.escape("京津晋冀蒙辽吉黑沪苏浙皖闽赣鲁豫鄂湘粤桂琼渝川贵云藏陕甘青宁国防") + "]")
+
+    # 正则表达式匹配非车牌字符,排除车牌可能包含的字符(如字母、数字、特殊标志等)
+    not_che_pai_pattern = re.compile(
+        "[^京津晋冀蒙辽吉黑沪苏浙皖闽赣鲁豫鄂湘粤桂琼渝川贵云藏陕甘青宁新港澳学挂领试超练警国防A-Z\\d]")
+
+    # 正则表达式匹配完整的车牌号格式
+    che_pai_pattern = re.compile(
+        r"([京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z]"
+        r"(([DF]((?![IO])[A-Z0-9](?![IO]))\d{4})|(\d{5}[DF]))|"
+        r"[京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z][A-Z][A-Z0-9]{4}[A-Z0-9挂学警港澳])"
+    )
+
+    # 定义二级行政区划映射表(地级市及其下属区县)
+    er_ji_map = {
+        "石家庄": ["鹿泉", "藁城", "栾城", "井陉矿区", "井陉", "无极", "正定", "元氏", "新乐", "晋州", "平山", "灵寿",
+                   "赞皇", "赵县", "行唐", "高邑", "辛集", "深泽"],
+        "唐山": ["唐山高开区", "迁西", "海港", "开平", "丰南", "滦县", "乐亭", "丰润", "玉田", "古冶", "曹妃甸", "遵化",
+                 "滦南", "迁安"],
+        "秦皇岛": ["北戴河新区", "北戴河", "山海关", "昌黎", "卢龙", "青龙", "抚宁"],
+        "邯郸": ["曲周", "魏县", "馆陶", "磁县", "大名", "鸡泽", "成安", "涉县", "永年", "武安", "峰峰", "广平", "临漳",
+                 "邱县", "肥乡"],
+        "邢台": ["新河", "南宫", "隆尧", "内邱", "平乡", "宁晋", "广宗", "清河", "临西", "任县", "巨鹿", "沙河", "威县",
+                 "临城", "柏乡", "南和"],
+        "保定": ["涞水", "蠡县", "顺平", "博野", "安国", "涞源", "唐县", "定州", "高阳", "曲阳", "阜平", "清苑",
+                 "高碑店",
+                 "满城", "涿州", "易县", "望都", "徐水", "定兴", "白沟"],
+        "张家口": ["张北", "崇礼", "康保", "赤城", "阳原", "万全", "下花园", "尚义", "怀安", "怀来", "蔚县", "涿鹿",
+                   "沽源",
+                   "宣化"],
+        "承德": ["承德县", "兴隆", "宽城", "平泉", "营子", "隆化", "滦平", "围场", "丰宁", "双滦"],
+        "廊坊": ["文安", "霸州", "大城", "廊坊开发区", "三河", "香河", "永清", "胜芳", "燕郊", "固安", "大厂"],
+        "沧州": ["东光", "吴桥", "黄骅", "盐山", "孟村", "泊头", "献县", "南皮", "渤海新区", "海兴", "沧县", "河间",
+                 "青县",
+                 "任丘", "肃宁"],
+        "衡水": ["景县", "阜城", "枣强", "深州", "饶阳", "故城", "武强", "武邑", "冀州", "安平"],
+        "雄安": ["容城", "雄县", "安新"]
+    }
+
+    # 读取 Excel 文件中的数据
+    df = pd.read_excel(io=input_path)
+    # 对需要清理的列进行字符串清理,移除多余的空白字符
+    df = df.map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x)
+    df.columns = ["year_month", "raw_che_pai_hao_he_tong", "che_xing", "che_liang_suo_shu_dan_wei",
+                  "he_tong_ming_cheng", "he_tong_bian_hao", "jia_shui_he_ji_jin_e", "bu_han_shui_jin_e", "shui_e",
+                  "zu_qi", "raw_che_pai_hao_ti_huan", "ti_huan_nian_yue", "bei_zhu"]
+
+    # 定义函数,用于提取并标准化车牌号
+    def get_che_pai(che_pai):
+        # 如果车牌号为空或无效,则返回空字符串
+        if pd.isna(che_pai) or not che_pai or not che_pai.strip():
+            return ""
+        # 将车牌号转换为大写
+        upper_case = che_pai.upper()
+        # 移除车牌号中不符合规则的字符
+        s = not_che_pai_pattern.sub("", upper_case)
+        # 使用正则表达式匹配合法的车牌号
+        m = che_pai_pattern.search(s)
+        if m:
+            return m.group(0)
+
+        # 如果车牌号包含省份简称但未匹配成功,记录警告日志
+        if has_che_pai_province_pattern.search(che_pai):
+            logger.warning(f"车牌匹配失败: {che_pai} -> {s}")
+            return s
+
+        # 如果完全无法匹配,记录警告日志并返回原车牌号
+        logger.warning(f"车牌匹配失败: {che_pai} -> {upper_case}")
+        return upper_case
+
+    # 定义函数,用于标记车牌号是否匹配失败
+    def che_pai_fail(che_pai):
+        # 如果车牌号为空或无效,则标记为失败
+        if pd.isna(che_pai) or not che_pai or not che_pai.strip():
+            return "1"
+
+        # 移除车牌号中不符合规则的字符
+        s = not_che_pai_pattern.sub("", che_pai.upper())
+        # 使用正则表达式匹配合法的车牌号
+        m = che_pai_pattern.search(s)
+        if m:
+            return "0"  # 匹配成功
+
+        return "1"  # 匹配失败
+
+    # 定义函数,用于提取一级单位
+    def get_first_unit(unit):
+        # 如果单位为空或无效,则返回空字符串
+        if pd.isna(unit) or not unit or not unit.strip():
+            return ""
+        # 根据单位名称中的关键词返回对应的一级单位
+        if "机动通信局" in unit or "机动局" in unit or "传输局" in unit or "线路维护中心" in unit:
+            return "机动局"
+        if "雄安基地建设部" in unit:
+            return "雄安基地建设部"
+        if "华北基地建设部" in unit:
+            return "华北基地建设部"
+        # 遍历 er_ji_map 的键,寻找匹配的一级单位
+        for yj in er_ji_map.keys():
+            if yj in unit:
+                return yj
+        return "省公司本部"  # 默认返回省公司本部
+
+    # 结果列表
+    result_list = []
+
+    # 遍历每一行
+    for _, row in df.iterrows():
+        # 提取 year_month 并转换为整数
+        year_month = row['year_month']
+        year_month_integer = int(year_month)
+        # 解析日期
+        local_date = datetime.strptime(str(year_month_integer), "%Y%m")
+        # 获取原始车牌号并处理
+        raw_che_pai_hao_he_tong = row['raw_che_pai_hao_he_tong']
+        che_pai_hao_he_tong = get_che_pai(raw_che_pai_hao_he_tong)
+        che_pai_he_tong_fail = che_pai_fail(raw_che_pai_hao_he_tong)
+        # 获取替换车牌号并处理
+        raw_che_pai_hao_ti_huan = row['raw_che_pai_hao_ti_huan']
+        che_pai_hao_ti_huan = get_che_pai(raw_che_pai_hao_ti_huan)
+        che_pai_ti_huan_fail = che_pai_fail(raw_che_pai_hao_ti_huan)
+        # 获取车辆所属单位并提取第一个单位
+        che_liang_suo_shu_dan_wei = row['che_liang_suo_shu_dan_wei']
+        first_unit = get_first_unit(che_liang_suo_shu_dan_wei)
+        # 获取租期并转换为整数
+        zu_qi = row['zu_qi']
+        zu_qi_int = int(zu_qi)
+        # 获取替换年月并处理
+        ti_huan_nian_yue = row['ti_huan_nian_yue']
+        ti_huan_nian_yue_integer = int(ti_huan_nian_yue) if pd.notna(ti_huan_nian_yue) else None
+        # 确定当前车牌号
+        che_pai_hao = (
+            che_pai_hao_ti_huan
+            if ti_huan_nian_yue_integer and year_month_integer >= ti_huan_nian_yue_integer
+            else che_pai_hao_he_tong
+        )
+
+        # 日志记录(Python 中使用 print 模拟)
+        if pd.notna(che_pai_hao_ti_huan) and che_pai_hao_ti_huan:
+            logger.info("{} -> {} -> {} -> {}", che_pai_hao_he_tong, year_month_integer, ti_huan_nian_yue_integer,
+                        ti_huan_nian_yue_integer and year_month_integer >= ti_huan_nian_yue_integer)
+
+        # 构造结果字典
+        result_dict = {
+            **row.to_dict(),
+            "che_pai_hao_he_tong": che_pai_hao_he_tong,
+            "che_pai_he_tong_fail": che_pai_he_tong_fail,
+            "che_pai_hao_ti_huan": che_pai_hao_ti_huan,
+            "che_pai_ti_huan_fail": che_pai_ti_huan_fail,
+            "first_unit": first_unit,
+            "che_pai_hao": che_pai_hao,
+            "year_no": local_date.strftime("%Y"),
+            "month_no": local_date.strftime("%m"),
+        }
+
+        # 添加到结果列表
+        result_list.append(result_dict)
+        next_local_date = local_date
+
+        # 处理后续月份
+        for _ in range(1, zu_qi_int):
+            next_local_date = next_local_date + relativedelta(months=1, day=1)
+            next_month_integer = int(next_local_date.strftime("%Y%m"))
+
+            # 确定当前车牌号
+            next_che_pai_hao = (
+                che_pai_hao_ti_huan
+                if ti_huan_nian_yue_integer is not None and next_month_integer >= ti_huan_nian_yue_integer
+                else che_pai_hao_he_tong
+            )
+
+            # 构造后续月份的结果字典
+            next_result_dict = {
+                **result_dict,
+                "year_month": str(next_month_integer),
+                "year_no": next_local_date.strftime("%Y"),
+                "month_no": next_local_date.strftime("%m"),
+                "che_pai_hao": next_che_pai_hao,
+            }
+
+            # 添加到结果列表
+            result_list.append(next_result_dict)
+
+    # 转换为 DataFrame
+    result_df = pd.DataFrame(result_list)
+    result_df['ti_huan_nian_yue'] = result_df['ti_huan_nian_yue'].astype('Int64')
+    # 打印DataFrame的信息
+    print(result_df.info())
+
+    # 将处理后的数据保存到CSV文件中
+    result_df.to_csv(path_or_buf=output_path,
+                     index=False,
+                     encoding='utf-8-sig')
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "car.car_zu_lin"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "year_month,raw_che_pai_hao_he_tong,che_xing,che_liang_suo_shu_dan_wei,he_tong_ming_cheng,he_tong_bian_hao,jia_shui_he_ji_jin_e,bu_han_shui_jin_e,shui_e,zu_qi,raw_che_pai_hao_ti_huan,ti_huan_nian_yue,bei_zhu,che_pai_hao_he_tong,che_pai_he_tong_fail,che_pai_hao_ti_huan,che_pai_ti_huan_fail,first_unit,che_pai_hao,year_no,month_no"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    # 获取当前日期,并计算上个月的第一天
+    today = datetime.today()
+    start_date = today - relativedelta(months=1, day=1)
+    remote_path = f'{remote_dir_path}{start_date.strftime('%Y%m')}.xlsx'  # 定义远程主机的目标文件路径
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            # 记录日志,提示即将上传的本地文件和远程目标路径
+            logger.info("upload {} to {}", input_path, remote_path)
+            # 使用SFTP的put方法将本地文件上传到远程主机
+            sftp.put(input_path, remote_path)
+            # 记录日志,提示文件已成功上传
+            logger.info("uploaded {}", input_path)
+
+
+data_process()
+data_import()
+upload_file()

+ 13 - 0
copy.ps1

@@ -0,0 +1,13 @@
+param (
+    [string]$db_host,
+    [int]$db_port,
+    [string]$db_username,
+    [string]$db_password,
+    [string]$dbname,
+    [string]$table,
+    [string]$filename,
+    [string]$columns
+)
+$env:PGPASSWORD = $db_password
+$psqlCommand = "psql -h `"$db_host`" -p $db_port -U `"$db_username`" -d `"$dbname`" -c `"\COPY $table ($columns) FROM '$filename' WITH (FORMAT csv, HEADER true, ENCODING 'UTF8');`""
+Invoke-Expression $psqlCommand

+ 106 - 0
doc/sql.md

@@ -0,0 +1,106 @@
+# sql
+
+## 车辆黄皮书越界
+
+```sql
+with t101 as (
+select
+    TO_CHAR(CURRENT_DATE - INTERVAL '1 month', 'YYYYMM') as year_month,
+    first_unit,
+    count(1) as total,
+    count(shi_fou_zhi_xing_zhong_yue_jie = '是' or null) as ls,
+    count(shi_fou_zhi_xing_zhong_yue_jie = '否' or null) as wg
+from
+    car.car_yue_jie
+where
+    year_month = TO_CHAR(CURRENT_DATE - INTERVAL '1 month', 'YYYYMM')::int4
+group by
+    first_unit
+),
+t102 as (
+select
+    year_month,
+    car.car_second_unit_sort.second_unit,
+    coalesce(total, 0) as total,
+    null as bjx,
+    coalesce(ls, 0) as ls,
+    coalesce(wg, 0) as wg
+from
+    t101
+right join car.car_second_unit_sort on
+    t101.first_unit = car.car_second_unit_sort.second_unit
+where
+    car_second_unit_sort.second_unit in ('石家庄', '唐山', '秦皇岛', '邯郸', '邢台', '保定', '张家口', '承德', '廊坊', '沧州', '衡水', '雄安', '机动局', '省公司本部')
+order by
+    car.car_second_unit_sort.sort desc
+)
+select
+    *
+from
+    t102
+```
+
+## 车辆黄皮书费用
+
+```sql
+with
+t101 as (
+select
+    TO_CHAR(CURRENT_DATE - INTERVAL '1 month', 'YYYYMM') as month_id,
+    second_org_no,
+    second_org_name,
+    round(sum(case when kpi_id in ('KPI_001', 'KPI_002', 'KPI_004', 'KPI_005', 'KPI_006', 'KPI_007', 'KPI_008', 'KPI_009', 'KPI_010', 'KPI_011', 'KPI_013', 'KPI_014', 'KPI_015', 'KPI_016', 'KPI_017', 'KPI_018', 'KPI_019', 'KPI_020', 'KPI_021', 'KPI_022', 'KPI_023') then ty_value else 0 end) / 10000, 2) as ty_fy,
+    round(sum(case when kpi_id in ('KPI_001', 'KPI_002', 'KPI_004', 'KPI_005', 'KPI_006', 'KPI_007', 'KPI_008', 'KPI_009', 'KPI_010', 'KPI_011', 'KPI_013', 'KPI_014', 'KPI_015', 'KPI_016', 'KPI_017', 'KPI_018', 'KPI_019', 'KPI_020', 'KPI_021', 'KPI_022', 'KPI_023') then ly_value else 0 end) / 10000, 2) as ly_fy
+from
+    car.car_fee_hb
+where
+    second_org_no is not null
+    and month_id = TO_CHAR(CURRENT_DATE - INTERVAL '1 month', 'YYYYMM')::int4
+group by
+    second_org_no,
+    second_org_name
+),
+t102 as (
+select
+    t101.*,
+    case
+        when ly_fy = 0 then null
+        else round((ty_fy - ly_fy) / ly_fy * 100, 2)
+    end as tb
+from
+    t101
+join car.car_second_unit_sort c on
+    t101.second_org_name = c.second_unit
+order by
+    c.sort desc
+)
+select
+    *
+from
+    t102
+```
+
+## 查询接口日志
+
+```sql
+select
+    *
+from
+    common.request_log
+where
+    login_id != 'test'
+order by
+    request_time desc
+```
+
+## 查询数据处理日志
+
+```sql
+select
+    *
+from
+    common.data_log
+order by
+    create_time desc
+```
+

+ 332 - 0
house/house-abnormal-data/house_abnormal_data.py

@@ -0,0 +1,332 @@
+"""不动产异常数据稽核数据处理
+"""
+
+import os  # 导入os模块,用于处理文件和目录操作
+import re  # 导入re模块,用于正则表达式操作
+from loguru import logger  # 导入loguru模块,用于日志记录
+import pandas as pd  # 导入pandas模块,用于数据处理和分析
+import psycopg  # 导入psycopg模块,用于连接PostgreSQL数据库
+from datetime import datetime  # 导入datetime模块,用于日期和时间操作
+from dateutil.relativedelta import relativedelta  # 导入relativedelta模块,用于日期的相对计算
+import xlwings as xw  # 导入xlwings模块,用于操作Excel文件
+import subprocess
+import paramiko
+
+# 配置日志记录器,将日志输出到文件a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/house/abnormal-data/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+# 创建一个不可见的Excel应用程序实例
+app = xw.App(visible=False)
+# 获取当前日期和时间
+today = datetime.today()
+# 计算上个月的第一天作为起始日期
+start_date = today - relativedelta(months=1, day=1)
+# 将起始日期格式化为YYYYMM的形式
+year_month = start_date.strftime('%Y%m')
+# 定义数据输入和输出目录
+input_dir = 'data/'
+output_dir = 'output/'
+# 输出文件路径
+output_path = 'output.csv'
+remote_dir_path = f"{remote_dir_path}{year_month}/"
+
+
+def xls_to_xlsx():
+    # 如果输出目录不存在,则创建该目录
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    # 遍历数据目录中的所有文件
+    for filename in os.listdir(input_dir):
+        # 只处理以.xls结尾的文件
+        if filename.endswith(".xls"):
+            file_path = os.path.join(input_dir, filename)  # 构造文件路径
+            workbook = app.books.open(file_path)  # 打开Excel文件
+            pattern = r'\D+(\d+)'  # 定义正则表达式,用于匹配非数字部分
+            # 使用正则表达式替换文件名,并将扩展名改为.xlsx
+            new_file_name = re.sub(pattern, year_month + '_', filename).replace(".xls", ".xlsx")
+            # 保存修改后的文件到输出目录
+            workbook.save(os.path.join(output_dir, new_file_name))
+            logger.info(f"{filename} -> {new_file_name}")  # 记录日志
+            workbook.close()  # 关闭工作簿
+
+
+def data_process():
+    # 初始化组织映射和三级组织列表映射
+    org_map = {}
+    third_org_list_map = {}
+
+    # 连接PostgreSQL数据库
+    with psycopg.connect(
+            conninfo=conn_info,
+            row_factory=psycopg.rows.dict_row  # 使用字典格式返回查询结果
+    ) as conn:
+        with conn.cursor() as curs:
+            # 查询一级组织信息
+            sql = """
+            select * from common.organization where grade = 1
+            """
+            logger.info(f"sql: {sql}")  # 记录SQL语句
+            curs.execute(sql)
+            second_orgs = curs.fetchall()
+            # 初始化三级组织列表映射
+            for x in second_orgs:
+                third_org_list_map[x['id']] = []
+            # 查询所有组织信息
+            sql = """
+            select * from common.organization
+            """
+            logger.info(f"sql: {sql}")  # 记录SQL语句
+            curs.execute(sql)
+            orgs = curs.fetchall()
+            # 构建组织映射和三级组织列表映射
+            for x in orgs:
+                if x['parent_id'] in third_org_list_map:
+                    third_org_list_map[x['parent_id']].append(x)
+                org_map[x['id']] = x
+
+    files = os.listdir(output_dir)
+    file_list = list(filter(lambda x: x.endswith('.xlsx'), files))  # 筛选出以.xlsx结尾的文件
+    logger.info('file_list: {}', file_list)  # 记录文件列表
+
+    # 初始化数据列表
+    data_list = []
+
+    # 遍历文件列表,读取Excel文件内容并进行处理
+    for t in file_list:
+        logger.info(f'{t}')  # 记录当前文件名
+        ny = t.replace('.xlsx', '').split('_')[0]  # 提取年月信息
+        ds = t.replace('.xlsx', '').split('_')[1]  # 提取地市信息
+        tmp = pd.read_excel(output_dir + t, skiprows=8, header=None)  # 读取Excel文件内容
+        if '省本部' in ds or '省公司' in ds:
+            tmp = pd.read_excel(output_dir + t, skiprows=8, header=None, nrows=1)  # 特殊处理省本部文件
+        tmp['年月'] = ny  # 添加年月列
+        tmp['地市'] = ds  # 添加地市列
+        tmp['source'] = t  # 添加来源文件名列
+        data_list.append(tmp)  # 将数据添加到数据列表中
+
+    # 合并所有数据到一个DataFrame中
+    df = pd.concat(data_list)
+
+    # 去除字符串中的多余空格
+    df = df.map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x)
+
+    # 定义DataFrame列名
+    df.columns = ['third_unit', 'wei_guan_lian_tu_di_zheng', 'wei_guan_lian_fang_chan_zheng', 'wei_guan_lian_ju_zhi',
+                  'jian_zhu_tu_di_ju_zhi_bu_dui_ying', 'tu_di_ju_zhi_bian_hao_bu_cun_zai',
+                  'jian_zhu_ju_zhi_bian_hao_bu_cun_zai', 'jian_zhu_tu_di_biao_qian_hao_bu_cun_zai',
+                  'dai_guan_lian_tu_di', 'dai_guan_lian_jian_zhu', 'ju_zhi_gte_three_tu_di', 'ju_zhi_gt_ten_tu_di',
+                  'ju_zhi_gte_five_jian_zhu', 'ju_zhi_gte_ten_jian_zhu', 'tu_di_gte_seven_jian_zhu',
+                  'tu_di_gte_ten_jian_zhu', 'tu_di_zheng_xia_ci_kong_bai', 'fang_chan_zheng_xia_ci_kong_bai',
+                  'ju_zhi_di_duan_kong_bai', 'xian_zhi_jian_zhu_mian_ji', 'xian_zhi_tu_di_mian_ji',
+                  'wu_jian_zhu_fei_xian_zhi_tu_di_mian_ji', 'tu_di_shi_yong_quan_qi_ta', 'year_month', 'second_unit',
+                  'source']
+
+    # 定义函数,根据二级单位和三级单位获取区域编号
+    def get_area_no(x):
+        second_unit = x['second_unit']
+        third_unit = x['third_unit']
+        if '长途局' in second_unit or '长途通信传输局' in second_unit or '机动局' in second_unit or '传输局' in second_unit:
+            return '-11'
+        if '保定' in second_unit and ('雄县' in third_unit or '容城' in third_unit or '安新' in third_unit):
+            return '782'
+        for second_org in second_orgs:
+            area_name = second_org['name']
+            area_no = second_org['id']
+            if area_name in second_unit:
+                return area_no
+        return '-12'
+
+    # 应用get_area_no函数,生成area_no列
+    df['area_no'] = df.apply(get_area_no, axis=1)
+
+    # 定义函数,根据区域编号获取区域名称
+    def get_area_name(x):
+        area_no = x['area_no']
+        second_org = org_map[area_no]
+        area_name = second_org['name']
+        return area_name
+
+    # 应用get_area_name函数,生成area_name列
+    df['area_name'] = df.apply(get_area_name, axis=1)
+
+    # 定义函数,根据三级单位、区域名称和区域编号获取城市编号
+    def get_city_no(x):
+        third_unit = x['third_unit']
+        area_name = x['area_name']
+        area_no = x['area_no']
+        if area_name == '石家庄':
+            if '矿区' in third_unit:
+                return 'D0130185'
+            if '井陉' in third_unit:
+                return 'D0130121'
+        if area_name == '秦皇岛':
+            if '北戴河新区' in third_unit:
+                return 'D0130185'
+            if '北戴河' in third_unit:
+                return 'D0130304'
+        if area_name == '唐山':
+            if '滦县' in third_unit:
+                return 'D0130223'
+            if '高新技术开发区' in third_unit:
+                return 'D0130205'
+        if area_name == '邢台':
+            if '内丘' in third_unit:
+                return 'D0130523'
+            if '任泽' in third_unit:
+                return 'D0130526'
+        if area_name == '邯郸':
+            if '峰峰' in third_unit:
+                return 'D0130406'
+        if area_name == '省机动局':
+            if '沧州' in third_unit:
+                return 'HECS180'
+            if '唐山' in third_unit:
+                return 'HECS181'
+            if '秦皇岛' in third_unit:
+                return 'HECS182'
+            if '廊坊' in third_unit:
+                return 'HECS183'
+            if '张家口' in third_unit:
+                return 'HECS184'
+            if '邢台' in third_unit:
+                return 'HECS185'
+            if '邯郸' in third_unit:
+                return 'HECS186'
+            if '保定' in third_unit:
+                return 'HECS187'
+            if '石家庄' in third_unit:
+                return 'HECS188'
+            if '承德' in third_unit:
+                return 'HECS189'
+            if '衡水' in third_unit:
+                return 'HECS720'
+            if '雄安' in third_unit:
+                return 'HECS728'
+            return 'HECS018'
+        if '雄安' == area_name:
+            third_unit = third_unit.replace('雄安新区', '')
+        third_org_list = third_org_list_map[area_no]
+        for third_org in third_org_list:
+            city_name = third_org['name']
+            if city_name in third_unit:
+                return third_org['id']
+        if '沧州' == area_name:
+            return 'D0130911'
+        if '唐山' == area_name:
+            return 'D0130202'
+        if '秦皇岛' == area_name:
+            return 'D0130302'
+        if '廊坊' == area_name:
+            return 'D0131000'
+        if '张家口' == area_name:
+            return 'D0130701'
+        if '邢台' == area_name:
+            return 'D0130502'
+        if '邯郸' == area_name:
+            return 'D0130402'
+        if '保定' == area_name:
+            return 'D0130601'
+        if '石家庄' == area_name:
+            return 'D0130186'
+        if '承德' == area_name:
+            return 'D0130801'
+        if '衡水' == area_name:
+            return 'D0133001'
+        if '雄安' == area_name:
+            return 'D0130830'
+        return 'HE001'
+
+    # 应用get_city_no函数,生成city_no列
+    df['city_no'] = df.apply(get_city_no, axis=1)
+
+    # 定义函数,根据城市编号获取城市名称
+    def get_city_name(x):
+        city_no = x['city_no']
+        third_org = org_map[city_no]
+        city_name = third_org['name']
+        return city_name
+
+    # 应用get_city_name函数,生成city_name列
+    df['city_name'] = df.apply(get_city_name, axis=1)
+
+    # 输出DataFrame的基本信息
+    print(df.info())
+
+    # 将处理后的数据保存为CSV文件
+    df.to_csv(path_or_buf=output_path,
+              index=False,
+              encoding='utf-8-sig')
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "house.abnormal_data"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "third_unit,wei_guan_lian_tu_di_zheng,wei_guan_lian_fang_chan_zheng,wei_guan_lian_ju_zhi,jian_zhu_tu_di_ju_zhi_bu_dui_ying,tu_di_ju_zhi_bian_hao_bu_cun_zai,jian_zhu_ju_zhi_bian_hao_bu_cun_zai,jian_zhu_tu_di_biao_qian_hao_bu_cun_zai,dai_guan_lian_tu_di,dai_guan_lian_jian_zhu,ju_zhi_gte_three_tu_di,ju_zhi_gt_ten_tu_di,ju_zhi_gte_five_jian_zhu,ju_zhi_gte_ten_jian_zhu,tu_di_gte_seven_jian_zhu,tu_di_gte_ten_jian_zhu,tu_di_zheng_xia_ci_kong_bai,fang_chan_zheng_xia_ci_kong_bai,ju_zhi_di_duan_kong_bai,xian_zhi_jian_zhu_mian_ji,xian_zhi_tu_di_mian_ji,wu_jian_zhu_fei_xian_zhi_tu_di_mian_ji,tu_di_shi_yong_quan_qi_ta,year_month,second_unit,source,area_no,area_name,city_no,city_name"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            for filename in os.listdir(input_dir):
+                local_path = os.path.join(input_dir, filename)
+                remote_path = f'{remote_dir_path}{filename}'
+                if os.path.isfile(local_path):
+                    # 记录日志,提示即将上传的本地文件和远程目标路径
+                    logger.info("upload {} to {}", local_path, remote_path)
+                    # 使用SFTP的put方法将本地文件上传到远程主机
+                    sftp.put(local_path, remote_path)
+                    # 记录日志,提示文件已成功上传
+                    logger.info("uploaded {}", local_path)
+
+
+xls_to_xlsx()
+data_process()
+data_import()
+upload_file()

+ 767 - 0
house/house-building/house_building.py

@@ -0,0 +1,767 @@
+"""不动产建筑数据处理
+"""
+
+# 导入必要的库
+import re  # 正则表达式库,用于字符串处理
+from datetime import datetime  # 日期时间库,用于处理日期和时间
+from dateutil.relativedelta import relativedelta  # 日期时间相对偏移库,用于计算相对日期
+from decimal import Decimal  # 高精度小数库,用于精确的数值计算
+
+from loguru import logger  # 日志库,用于记录日志信息
+import pandas as pd  # 数据分析库,用于处理数据框
+import psycopg  # PostgreSQL数据库连接库,用于与PostgreSQL交互
+import subprocess
+import paramiko
+
+# 配置日志记录器,将日志写入文件a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/house/building/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+# 获取当前日期,并计算上个月的第一天
+today = datetime.today()
+start_date = today - relativedelta(months=1, day=1)
+year_month = start_date.strftime('%Y%m')
+# 数据文件路径
+input_path = 'data.xlsx'
+# 输出文件路径
+output_path = 'output.csv'
+
+
+def data_process():
+    # 初始化全局变量,用于存储组织、区域等映射关系
+    org_map = {}  # 组织ID到组织信息的映射
+    third_org_list_map = {}  # 二级组织ID到其下属三级组织列表的映射
+    area_map = {}  # 区域ID到区域信息的映射
+    districts_list_map = {}  # 城市ID到其下属区县列表的映射
+
+    # 连接到PostgreSQL数据库,并获取组织和区域数据
+    with psycopg.connect(
+            conninfo=conn_info,
+            row_factory=psycopg.rows.dict_row
+    ) as conn:
+        with conn.cursor() as curs:
+            # 查询所有一级组织(grade=1)
+            sql = """
+            select * from common.organization where grade = 1
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            second_orgs = curs.fetchall()
+            for x in second_orgs:
+                third_org_list_map[x['id']] = []  # 初始化每个二级组织的三级组织列表为空
+
+            # 查询所有组织
+            sql = """
+            select * from common.organization
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            orgs = curs.fetchall()
+            for x in orgs:
+                if x['parent_id'] in third_org_list_map:
+                    third_org_list_map[x['parent_id']].append(x)  # 将三级组织添加到对应的二级组织列表中
+                org_map[x['id']] = x  # 构建组织ID到组织信息的映射
+
+            # 查询所有省级区域(area_grade=1)
+            sql = """
+            select * from common.area where area_grade = 1 order by area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            cities = curs.fetchall()
+            for x in cities:
+                districts_list_map[x['area_id']] = []  # 初始化每个城市的区县列表为空
+
+            # 查询所有区域
+            sql = """
+            select * from common.area
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            areas = curs.fetchall()
+            for x in areas:
+                if x['parent_id'] in districts_list_map:
+                    districts_list_map[x['parent_id']].append(x)  # 将区县添加到对应的城市列表中
+                area_map[x['area_id']] = x  # 构建区域ID到区域信息的映射
+
+    # 读取Excel文件中的数据并进行预处理
+    df = pd.read_excel(io=input_path)  # 读取Excel文件
+    df = df.map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x)  # 去除字符串字段中的多余空格
+    df.drop_duplicates(subset=['建筑ID'], keep='last', inplace=True)  # 去重,保留最后一条记录
+
+    # 定义函数:根据资产所属单位获取二级组织机构编码
+    def get_area_no(x):
+        second_unit = x['资产所属单位(二级)']
+        third_unit = x['资产所属单位(三级)']
+        if '长途通信传输局' == second_unit:
+            return '-11'
+        if '保定' in second_unit and ('雄县' in third_unit or '容城' in third_unit or '安新' in third_unit):
+            return '782'
+        for second_org in second_orgs:
+            area_name = second_org['name']
+            area_no = second_org['id']
+            if area_name in second_unit:
+                return area_no
+        return '-12'
+
+    df['二级组织机构编码'] = df.apply(get_area_no, axis=1)  # 应用函数,生成二级组织机构编码列
+
+    # 定义函数:根据二级组织机构编码获取二级组织机构名称
+    def get_area_name(x):
+        area_no = x['二级组织机构编码']
+        second_org = org_map[area_no]
+        area_name = second_org['name']
+        return area_name
+
+    df['二级组织机构名称'] = df.apply(get_area_name, axis=1)  # 应用函数,生成二级组织机构名称列
+
+    # 定义函数:根据资产所属单位获取三级组织机构编码
+    def get_city_no(x):
+        third_unit = x['资产所属单位(三级)']
+        area_name = x['二级组织机构名称']
+        area_no = x['二级组织机构编码']
+        if area_name == '石家庄':
+            if '矿区' in third_unit:
+                return 'D0130185'
+            if '井陉' in third_unit:
+                return 'D0130121'
+        if area_name == '秦皇岛':
+            if '北戴河新区' in third_unit:
+                return 'D0130185'
+            if '北戴河' in third_unit:
+                return 'D0130304'
+        if area_name == '唐山':
+            if '滦县' in third_unit:
+                return 'D0130223'
+            if '高新技术开发区' in third_unit:
+                return 'D0130205'
+        if area_name == '邢台':
+            if '内丘' in third_unit:
+                return 'D0130523'
+            if '任泽' in third_unit:
+                return 'D0130526'
+        if area_name == '邯郸':
+            if '峰峰' in third_unit:
+                return 'D0130406'
+        if area_name == '省机动局':
+            if '沧州' in third_unit:
+                return 'HECS180'
+            if '唐山' in third_unit:
+                return 'HECS181'
+            if '秦皇岛' in third_unit:
+                return 'HECS182'
+            if '廊坊' in third_unit:
+                return 'HECS183'
+            if '张家口' in third_unit:
+                return 'HECS184'
+            if '邢台' in third_unit:
+                return 'HECS185'
+            if '邯郸' in third_unit:
+                return 'HECS186'
+            if '保定' in third_unit:
+                return 'HECS187'
+            if '石家庄' in third_unit:
+                return 'HECS188'
+            if '承德' in third_unit:
+                return 'HECS189'
+            if '衡水' in third_unit:
+                return 'HECS720'
+            if '雄安' in third_unit:
+                return 'HECS728'
+            return 'HECS018'
+        if '雄安' == area_name:
+            third_unit = third_unit.replace('雄安新区', '')
+        third_org_list = third_org_list_map[area_no]
+        for third_org in third_org_list:
+            city_name = third_org['name']
+            if city_name in third_unit:
+                return third_org['id']
+        if '沧州' == area_name:
+            return 'D0130911'
+        if '唐山' == area_name:
+            return 'D0130202'
+        if '秦皇岛' == area_name:
+            return 'D0130302'
+        if '廊坊' == area_name:
+            return 'D0131000'
+        if '张家口' == area_name:
+            return 'D0130701'
+        if '邢台' == area_name:
+            return 'D0130502'
+        if '邯郸' == area_name:
+            return 'D0130402'
+        if '保定' == area_name:
+            return 'D0130601'
+        if '石家庄' == area_name:
+            return 'D0130186'
+        if '承德' == area_name:
+            return 'D0130801'
+        if '衡水' == area_name:
+            return 'D0133001'
+        if '雄安' == area_name:
+            return 'D0130830'
+        return 'HE001'
+
+    df['三级组织机构编码'] = df.apply(get_city_no, axis=1)  # 应用函数,生成三级组织机构编码列
+
+    # 定义函数:根据三级组织机构编码获取三级组织机构名称
+    def get_city_name(x):
+        city_no = x['三级组织机构编码']
+        third_org = org_map[city_no]
+        city_name = third_org['name']
+        return city_name
+
+    df['三级组织机构名称'] = df.apply(get_city_name, axis=1)  # 应用函数,生成三级组织机构名称列
+
+    # 定义函数:根据标准地址获取城市ID
+    def get_city_id(x):
+        address = x['标准地址']
+        second_unit = x['资产所属单位(二级)']
+        third_unit = x['资产所属单位(三级)']
+        if '雄安' in address or ('保定' in address and ('雄县' in address or '容城' in address or '安新' in address)):
+            return '133100'
+        for city in cities:
+            area_name = city['short_name']
+            area_id = city['area_id']
+            if area_name in second_unit:
+                return area_id
+            if area_name in third_unit:
+                return area_id
+            if area_name in address:
+                return area_id
+        return ''
+
+    df['city_id'] = df.apply(get_city_id, axis=1)  # 应用函数,生成城市ID列
+
+    # 定义函数:根据城市ID获取城市名称
+    def get_city(x):
+        city_id = x['city_id']
+        area = area_map.get(city_id)
+        if pd.notna(area):
+            city = area['area_name']
+            return city
+        return ''
+
+    df['city'] = df.apply(get_city, axis=1)  # 应用函数,生成城市名称列
+
+    # 定义函数:根据标准地址获取区县ID
+    def get_district_id(x):
+        address = x['标准地址']
+        city = x['city']
+        city_id = x['city_id']
+        if pd.isna(city) or pd.isna(address):
+            return ''
+        if city == '石家庄':
+            if '矿区' in address:
+                return '130107'
+            if '井陉' in address:
+                return '130121'
+        if city == '唐山':
+            if '滦县' in address:
+                return '130284'
+        if city == '邢台':
+            if '内邱' in address:
+                return '130523'
+            if '任县' in address:
+                return '130505'
+        if city == '雄安':
+            address = address.replace('雄安新区', '')
+        districts = districts_list_map.get(city_id)
+        if not districts:
+            return ''
+        for district in districts:
+            district_name = district['short_name']
+            if district_name in address:
+                return district['area_id']
+        return ''
+
+    df['district_id'] = df.apply(get_district_id, axis=1)  # 应用函数,生成区县ID列
+
+    # 定义函数:根据区县ID获取区县名称
+    def get_district(x):
+        district_id = x['district_id']
+        area = area_map.get(district_id)
+        if pd.notna(area):
+            district = area['area_name']
+            return district
+        return ''
+
+    df['district'] = df.apply(get_district, axis=1)  # 应用函数,生成区县名称列
+
+    # 定义函数:将百分比字符串转换为小数
+    def convert_percentage_to_number(x):
+        if pd.notna(x) and isinstance(x, str) and x.endswith('%'):
+            return Decimal(x[:-1]) / Decimal('100')
+        return x
+
+    df['得房率'] = df['得房率'].apply(convert_percentage_to_number)  # 应用函数,将得房率转换为小数
+    df['year_no'] = start_date.year  # 年份列
+    df['month_no'] = start_date.month  # 月份列
+
+    def get_int(x):
+        try:
+            return int(x)
+        except Exception:
+            return ""
+
+    df['房龄开始年份'] = df['房龄开始年份'].apply(get_int)
+
+    # 定义函数:计算房龄
+    def get_house_age(x):
+        house_year_began = x['房龄开始年份']
+        if pd.notna(house_year_began) and house_year_began:
+            current_year = start_date.year
+            return current_year - house_year_began
+        return ''
+
+    df['house_age'] = df.apply(get_house_age, axis=1)  # 应用函数,生成房龄列
+    df.insert(0, '年月', year_month)  # 在数据框第一列插入年月列
+
+    # 打印数据框信息
+    print(df.info())
+
+    # 将结果保存为CSV文件
+    df.to_csv(
+        path_or_buf=output_path,
+        index=False,
+        header=[
+            'year_month', 'first_unit', 'second_unit', 'third_unit', 'building_name', 'building_id',
+            'housing_acquisition_rate', 'site_name', 'site_id', 'land_name', 'housing_source', 'acquisition_date',
+            'house_year_began', 'investor', 'management_level', 'building_structure', 'total_floors', 'frontage',
+            'courtyard', 'whole_building', 'property_ownership_certificate',
+            'no_property_ownership_certificate_reason', 'unrelated_assets', 'assets_num', 'assets_tag_num',
+            'usage_status', 'building_use', 'ownership_status', 'floor_area', 'building_area',
+            'building_area_self_use', 'building_area_rent', 'building_area_idle', 'building_area_unusable',
+            'usable_area', 'usable_area_self_use', 'usable_area_rent', 'usable_area_idle', 'usable_area_unusable',
+            'community_assistant_name', 'community_assistant_unit', 'lng_jt', 'lat_jt', 'address',
+            'property_owner', 'checked', 'area_no', 'area_name', 'city_no', 'city_name', 'city_id', 'city',
+            'district_id', 'district', 'year_no', 'month_no', 'house_age'
+        ],
+        encoding='utf-8-sig'
+    )
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "house.building_month"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "year_month,first_unit,second_unit,third_unit,building_name,building_id,housing_acquisition_rate,site_name,site_id,land_name,housing_source,acquisition_date,house_year_began,investor,management_level,building_structure,total_floors,frontage,courtyard,whole_building,property_ownership_certificate,no_property_ownership_certificate_reason,unrelated_assets,assets_num,assets_tag_num,usage_status,building_use,ownership_status,floor_area,building_area,building_area_self_use,building_area_rent,building_area_idle,building_area_unusable,usable_area,usable_area_self_use,usable_area_rent,usable_area_idle,usable_area_unusable,community_assistant_name,community_assistant_unit,lng_jt,lat_jt,address,property_owner,checked,area_no,area_name,city_no,city_name,city_id,city,district_id,district,year_no,month_no,house_age"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    remote_path = f'{remote_dir_path}{year_month}.xlsx'  # 定义远程主机的目标文件路径
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            # 记录日志,提示即将上传的本地文件和远程目标路径
+            logger.info("upload {} to {}", input_path, remote_path)
+            # 使用SFTP的put方法将本地文件上传到远程主机
+            sftp.put(input_path, remote_path)
+            # 记录日志,提示文件已成功上传
+            logger.info("uploaded {}", input_path)
+
+
+def data_update():
+    with psycopg.connect(
+            conninfo=conn_info,
+    ) as conn:
+        with conn.cursor() as curs:
+            # 更新局址信息
+            sql = f"""
+            update
+                house.building_month a
+            set
+                site_num = b.site_num,
+                city_level = b.city_level,
+                city_region = b.city_region,
+                area_sector = b.area_sector,
+                has_land = b.has_land
+            from
+                house.site_month b
+            where
+                a.site_id = b.site_id
+                and a.year_month = b.year_month
+                and a.year_month = {year_month}
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+
+            # 更新经纬度
+            sql = f"""
+            with
+            t101 as (
+            select
+                *
+            from
+                house.building_month
+            where
+                year_month = 202312
+            )
+            update
+                house.building_month a
+            set
+                lng_wgs84 = b.lng_wgs84,
+                lat_wgs84 = b.lat_wgs84,
+                lng_bd09 = b.lng_bd09,
+                lat_bd09 = b.lat_bd09,
+                building_img = b.building_img
+            from
+                t101 b
+            where
+                a.year_month = {year_month}
+                and a.building_id = b.building_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+
+            # 更新闲置建筑面积超过1000平米策略
+            sql = f"""
+            insert
+                into
+                house.building_idle_strategy
+            (
+                year_month,
+                building_id,
+                first_unit,
+                second_unit,
+                third_unit,
+                site_num,
+                site_name,
+                address,
+                city_level,
+                city_region,
+                area_sector,
+                has_land,
+                site_id,
+                building_name,
+                housing_acquisition_rate,
+                housing_source,
+                acquisition_date,
+                house_year_began,
+                investor,
+                management_level,
+                building_structure,
+                total_floors,
+                assets_num,
+                assets_tag_num,
+                usage_status,
+                building_use,
+                ownership_status,
+                floor_area,
+                building_area,
+                building_area_self_use,
+                building_area_rent,
+                building_area_idle,
+                building_area_unusable,
+                usable_area,
+                usable_area_self_use,
+                usable_area_rent,
+                usable_area_idle,
+                usable_area_unusable,
+                city,
+                district,
+                lng_wgs84,
+                lat_wgs84,
+                lng_bd09,
+                lat_bd09,
+                building_img,
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                year_no,
+                month_no,
+                house_age,
+                land_name,
+                frontage,
+                courtyard,
+                whole_building,
+                property_ownership_certificate,
+                no_property_ownership_certificate_reason,
+                unrelated_assets,
+                community_assistant_name,
+                community_assistant_unit,
+                lng_jt,
+                lat_jt,
+                property_owner,
+                checked,
+                city_id,
+                district_id
+            )
+            select
+                year_month,
+                building_id,
+                first_unit,
+                second_unit,
+                third_unit,
+                site_num,
+                site_name,
+                address,
+                city_level,
+                city_region,
+                area_sector,
+                has_land,
+                site_id,
+                building_name,
+                housing_acquisition_rate,
+                housing_source,
+                acquisition_date,
+                house_year_began,
+                investor,
+                management_level,
+                building_structure,
+                total_floors,
+                assets_num,
+                assets_tag_num,
+                usage_status,
+                building_use,
+                ownership_status,
+                floor_area,
+                building_area,
+                building_area_self_use,
+                building_area_rent,
+                building_area_idle,
+                building_area_unusable,
+                usable_area,
+                usable_area_self_use,
+                usable_area_rent,
+                usable_area_idle,
+                usable_area_unusable,
+                city,
+                district,
+                lng_wgs84,
+                lat_wgs84,
+                lng_bd09,
+                lat_bd09,
+                building_img,
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                year_no,
+                month_no,
+                house_age,
+                land_name,
+                frontage,
+                courtyard,
+                whole_building,
+                property_ownership_certificate,
+                no_property_ownership_certificate_reason,
+                unrelated_assets,
+                community_assistant_name,
+                community_assistant_unit,
+                lng_jt,
+                lat_jt,
+                property_owner,
+                checked,
+                city_id,
+                district_id
+            from
+                house.building_month
+            where
+                building_area_idle > 1000
+                and year_month = {year_month}
+            order by
+                building_area_idle desc
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+
+            sql = f"""
+            with
+            t101 as (
+            select
+                *,
+                row_number() over (
+                order by building_area_idle desc) as sort
+            from
+                house.building_idle_strategy
+            where
+                year_month = {year_month}
+            ),
+            t201 as (
+            select
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                'kpi_301320_155_01' as kpi_code,
+                '闲置建筑面积' as kpi_name,
+                round(building_area_idle, 2)::varchar as kpi_value,
+                '1' as kpi_type,
+                building_id as jk_object_no,
+                building_name as jk_object,
+                sort
+            from
+                t101
+            ),
+            t202 as (
+            select
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                'kpi_301320_155_02' as kpi_code,
+                '房产名称' as kpi_name,
+                building_name as kpi_value,
+                '0' as kpi_type,
+                building_id as jk_object_no,
+                building_name as jk_object,
+                sort
+            from
+                t101
+            ),
+            t203 as (
+            select
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                'kpi_301320_155_03' as kpi_code,
+                '房产编号' as kpi_name,
+                building_id as kpi_value,
+                '0' as kpi_type,
+                building_id as jk_object_no,
+                building_name as jk_object,
+                sort
+            from
+                t101
+            ),
+            t204 as (
+            select
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                'kpi_301320_155_04' as kpi_code,
+                '房产总建筑面积' as kpi_name,
+                round(building_area, 2)::varchar as kpi_value,
+                '0' as kpi_type,
+                building_id as jk_object_no,
+                building_name as jk_object,
+                sort
+            from
+                t101
+            ),
+            t301 as (
+            select
+                *
+            from
+                t201
+            union all
+            select
+                *
+            from
+                t202
+            union all
+            select
+                *
+            from
+                t203
+            union all
+            select
+                *
+            from
+                t204
+            )
+            insert
+                into
+                publish.house_building_idle_strategy
+            (
+                acct_date,
+                dept_code,
+                dept_name,
+                strategy_code,
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                sale_no,
+                sale_name,
+                jk_object_no,
+                jk_object,
+                kpi_code,
+                kpi_name,
+                kpi_value,
+                kpi_type,
+                sort
+            )
+            select
+                {year_month} as acct_date,
+                '301320' as dept_code,
+                '河北省分公司纵横运营中心' as dept_name,
+                '301320_155' as strategy_code,
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                '' as sale_no,
+                '' as sale_name,
+                jk_object_no,
+                jk_object,
+                kpi_code,
+                kpi_name,
+                kpi_value,
+                kpi_type,
+                sort
+            from
+                t301
+            order by
+                sort,
+                kpi_code
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+
+
+data_process()
+data_import()
+upload_file()
+data_update()

+ 574 - 0
house/house-fang-jian/house_fang_jian.py

@@ -0,0 +1,574 @@
+"""不动产房间数据处理
+"""
+
+import re
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from loguru import logger
+import pandas as pd
+import psycopg
+import subprocess
+import paramiko
+
+# 配置日志记录器,将日志写入文件a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/house/room/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+# 获取当前日期,并计算上个月的第一天
+today = datetime.today()
+start_date = today - relativedelta(months=1, day=1)
+year_month = start_date.strftime('%Y%m')
+# 数据文件路径
+input_path = 'data.xlsx'
+# 输出文件路径
+output_path = 'output.csv'
+
+
+def data_process():
+    org_map = {}  # 用于存储组织机构信息
+    third_org_list_map = {}  # 用于存储三级组织机构列表
+    area_map = {}  # 用于存储区域信息
+    districts_list_map = {}  # 用于存储区县信息
+
+    # 连接到 PostgreSQL 数据库
+    with psycopg.connect(
+            conninfo=conn_info,
+            row_factory=psycopg.rows.dict_row  # 设置返回结果为字典格式
+    ) as conn:
+        with conn.cursor() as curs:
+            # 查询所有二级组织机构(grade=1)
+            sql = """
+            select * from common.organization where grade = 1
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            second_orgs = curs.fetchall()
+            # 初始化 third_org_list_map,以每个二级组织的 ID 为键,值为空列表
+            for x in second_orgs:
+                third_org_list_map[x['id']] = []
+
+            # 查询所有组织机构信息
+            sql = """
+            select * from common.organization
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            orgs = curs.fetchall()
+            # 构建 org_map 和 third_org_list_map
+            for x in orgs:
+                if x['parent_id'] in third_org_list_map:
+                    third_org_list_map[x['parent_id']].append(x)
+                org_map[x['id']] = x
+
+            # 查询所有一级行政区划(area_grade=1),并按 area_id 排序
+            sql = """
+            select * from common.area where area_grade = 1 order by area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            cities = curs.fetchall()
+            # 初始化 districts_list_map,以每个城市的 area_id 为键,值为空列表
+            for x in cities:
+                districts_list_map[x['area_id']] = []
+
+            # 查询所有区域信息
+            sql = """
+            select * from common.area
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            areas = curs.fetchall()
+            # 构建 area_map 和 districts_list_map
+            for x in areas:
+                if x['parent_id'] in districts_list_map:
+                    districts_list_map[x['parent_id']].append(x)
+                area_map[x['area_id']] = x
+
+    # 读取 Excel 文件中的数据
+    df = pd.read_excel(io=input_path)
+
+    # 清理 DataFrame 中的空白字符(排除特定列)
+    columns_to_clean = list(filter(lambda x: x not in ('房间名称'), df.columns))
+    df[columns_to_clean] = df[columns_to_clean].map(
+        lambda x: re.sub(r'\s+', '', x) if type(x) is str else x
+    )
+
+    # 定义函数:根据资产所属单位获取二级组织机构编码
+    def get_area_no(x):
+        second_unit = x['资产所属单位(二级)']
+        third_unit = x['资产所属单位(三级)']
+        if '长途通信传输局' == second_unit:
+            return '-11'
+        if '保定' in second_unit and ('雄县' in third_unit or '容城' in third_unit or '安新' in third_unit):
+            return '782'
+        for second_org in second_orgs:
+            area_name = second_org['name']
+            area_no = second_org['id']
+            if area_name in second_unit:
+                return area_no
+        return '-12'
+
+    # 应用 get_area_no 函数,生成二级组织机构编码列
+    df['二级组织机构编码'] = df.apply(get_area_no, axis=1)
+
+    # 定义函数:根据二级组织机构编码获取对应的名称
+    def get_area_name(x):
+        area_no = x['二级组织机构编码']
+        second_org = org_map[area_no]
+        area_name = second_org['name']
+        return area_name
+
+    # 应用 get_area_name 函数,生成二级组织机构名称列
+    df['二级组织机构名称'] = df.apply(get_area_name, axis=1)
+
+    # 定义函数:根据资产所属单位获取三级组织机构编码
+    def get_city_no(x):
+        third_unit = x['资产所属单位(三级)']
+        area_name = x['二级组织机构名称']
+        area_no = x['二级组织机构编码']
+        if area_name == '石家庄':
+            if '矿区' in third_unit:
+                return 'D0130185'
+            if '井陉' in third_unit:
+                return 'D0130121'
+        if area_name == '秦皇岛':
+            if '北戴河新区' in third_unit:
+                return 'D0130185'
+            if '北戴河' in third_unit:
+                return 'D0130304'
+        if area_name == '唐山':
+            if '滦县' in third_unit:
+                return 'D0130223'
+            if '高新技术开发区' in third_unit:
+                return 'D0130205'
+        if area_name == '邢台':
+            if '内丘' in third_unit:
+                return 'D0130523'
+            if '任泽' in third_unit:
+                return 'D0130526'
+        if area_name == '邯郸':
+            if '峰峰' in third_unit:
+                return 'D0130406'
+        if area_name == '省机动局':
+            if '沧州' in third_unit:
+                return 'HECS180'
+            if '唐山' in third_unit:
+                return 'HECS181'
+            if '秦皇岛' in third_unit:
+                return 'HECS182'
+            if '廊坊' in third_unit:
+                return 'HECS183'
+            if '张家口' in third_unit:
+                return 'HECS184'
+            if '邢台' in third_unit:
+                return 'HECS185'
+            if '邯郸' in third_unit:
+                return 'HECS186'
+            if '保定' in third_unit:
+                return 'HECS187'
+            if '石家庄' in third_unit:
+                return 'HECS188'
+            if '承德' in third_unit:
+                return 'HECS189'
+            if '衡水' in third_unit:
+                return 'HECS720'
+            if '雄安' in third_unit:
+                return 'HECS728'
+            return 'HECS018'
+        if '雄安' == area_name:
+            third_unit = third_unit.replace('雄安新区', '')
+        third_org_list = third_org_list_map[area_no]
+        for third_org in third_org_list:
+            city_name = third_org['name']
+            if city_name in third_unit:
+                return third_org['id']
+        if '沧州' == area_name:
+            return 'D0130911'
+        if '唐山' == area_name:
+            return 'D0130202'
+        if '秦皇岛' == area_name:
+            return 'D0130302'
+        if '廊坊' == area_name:
+            return 'D0131000'
+        if '张家口' == area_name:
+            return 'D0130701'
+        if '邢台' == area_name:
+            return 'D0130502'
+        if '邯郸' == area_name:
+            return 'D0130402'
+        if '保定' == area_name:
+            return 'D0130601'
+        if '石家庄' == area_name:
+            return 'D0130186'
+        if '承德' == area_name:
+            return 'D0130801'
+        if '衡水' == area_name:
+            return 'D0133001'
+        if '雄安' == area_name:
+            return 'D0130830'
+        return 'HE001'
+
+    # 应用 get_city_no 函数,生成三级组织机构编码列
+    df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
+
+    # 定义函数:根据三级组织机构编码获取对应的名称
+    def get_city_name(x):
+        city_no = x['三级组织机构编码']
+        third_org = org_map[city_no]
+        city_name = third_org['name']
+        return city_name
+
+    # 应用 get_city_name 函数,生成三级组织机构名称列
+    df['三级组织机构名称'] = df.apply(get_city_name, axis=1)
+
+    # 定义函数:根据地址和资产所属单位获取城市 ID
+    def get_city_id(x):
+        address = x['标准地址']
+        second_unit = x['资产所属单位(二级)']
+        third_unit = x['资产所属单位(三级)']
+        if '雄安' in address or ('保定' in address and ('雄县' in address or '容城' in address or '安新' in address)):
+            return '133100'
+        for city in cities:
+            area_name = city['short_name']
+            area_id = city['area_id']
+            if area_name in second_unit or area_name in third_unit or area_name in address:
+                return area_id
+        return ''
+
+    # 应用 get_city_id 函数,生成城市 ID 列
+    df['city_id'] = df.apply(get_city_id, axis=1)
+
+    # 定义函数:根据城市 ID 获取城市名称
+    def get_city(x):
+        city_id = x['city_id']
+        area = area_map.get(city_id)
+        if pd.notna(area):
+            city = area['area_name']
+            return city
+        return ''
+
+    # 应用 get_city 函数,生成城市名称列
+    df['city'] = df.apply(get_city, axis=1)
+
+    # 定义函数:根据地址和城市信息获取区县 ID
+    def get_district_id(x):
+        address = x['标准地址']
+        city = x['city']
+        city_id = x['city_id']
+        if pd.isna(city) or pd.isna(address):
+            return ''
+        if city == '石家庄':
+            if '矿区' in address:
+                return '130107'
+            if '井陉' in address:
+                return '130121'
+        if city == '唐山':
+            if '滦县' in address:
+                return '130284'
+        if city == '邢台':
+            if '内邱' in address:
+                return '130523'
+            if '任县' in address:
+                return '130505'
+        if city == '雄安':
+            address = address.replace('雄安新区', '')
+        districts = districts_list_map.get(city_id)
+        if not districts:
+            return ''
+        for district in districts:
+            district_name = district['short_name']
+            if district_name in address:
+                return district['area_id']
+        return ''
+
+    # 应用 get_district_id 函数,生成区县 ID 列
+    df['district_id'] = df.apply(get_district_id, axis=1)
+
+    # 定义函数:根据区县 ID 获取区县名称
+    def get_district(x):
+        district_id = x['district_id']
+        area = area_map.get(district_id)
+        if pd.notna(area):
+            district = area['area_name']
+            return district
+        return ''
+
+    # 应用 get_district 函数,生成区县名称列
+    df['district'] = df.apply(get_district, axis=1)
+
+    def get_int(x):
+        try:
+            return int(x)
+        except Exception:
+            return ""
+
+    df['工位总数'] = df['工位总数'].apply(get_int)
+    # 在 DataFrame 中插入年月列
+    df.insert(0, '年月', year_month)
+
+    # 打印 DataFrame 的信息
+    print(df.info())
+
+    # 将处理后的数据保存为 CSV 文件
+    df.to_csv(
+        path_or_buf=output_path,
+        index=False,
+        header=[
+            'year_month', 'first_unit', 'second_unit', 'third_unit', 'building_name', 'address', 'floor',
+            'floor_building_area', 'floor_usable_area', 'room_name', 'room_status', 'rent_type',
+            'first_room_type', 'second_room_type', 'seat_num', 'frontage', 'building_area',
+            'building_area_self_use', 'building_area_idle', 'building_area_rent', 'building_area_unusable',
+            'usable_area', 'usable_area_self_use', 'usable_area_idle', 'usable_area_rent', 'usable_area_unusable',
+            'idle_start_date', 'unusable_reason', 'floor_height', 'load_bearing', 'area_no', 'area_name',
+            'city_no', 'city_name', 'city_id', 'city', 'district_id', 'district'
+        ],
+        encoding='utf-8-sig'
+    )
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "house.room_month"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "year_month,first_unit,second_unit,third_unit,building_name,address,floor,floor_building_area,floor_usable_area,room_name,room_status,rent_type,first_room_type,second_room_type,seat_num,frontage,building_area,building_area_self_use,building_area_idle,building_area_rent,building_area_unusable,usable_area,usable_area_self_use,usable_area_idle,usable_area_rent,usable_area_unusable,idle_start_date,unusable_reason,floor_height,load_bearing,area_no,area_name,city_no,city_name,city_id,city,district_id,district"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    remote_path = f'{remote_dir_path}{year_month}.xlsx'  # 定义远程主机的目标文件路径
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            # 记录日志,提示即将上传的本地文件和远程目标路径
+            logger.info("upload {} to {}", input_path, remote_path)
+            # 使用SFTP的put方法将本地文件上传到远程主机
+            sftp.put(input_path, remote_path)
+            # 记录日志,提示文件已成功上传
+            logger.info("uploaded {}", input_path)
+
+
+def data_update():
+    with psycopg.connect(
+            conninfo=conn_info,
+    ) as conn:
+        with conn.cursor() as curs:
+            # 更新人均办公面积
+            sql = f"""
+            with
+            t100 as (
+            select
+                id as area_no,
+                name as area_name,
+                order_num as area_order
+            from
+                common.organization
+            where
+                id in ('-11', '-12')
+            ),
+            t101 as (
+            select
+                area_no,
+                sum(building_area_self_use) as building_area_self_use_sum
+            from
+                house.room_month
+            where
+                second_room_type = '办公用房'
+                and year_month = {year_month}
+                and area_no in ('-11', '-12')
+            group by
+                area_no
+            ),
+            t102 as (
+            select
+                *
+            from
+                house.staff_second_unit
+            where
+                year_month = (
+                select
+                    max(year_month)
+                from
+                    house.staff_second_unit)
+                and area_no in ('-11', '-12') 
+            ),
+            t103 as (
+            select
+                t100.area_no,
+                t100.area_name,
+                '' as city_no,
+                '' as city_name,
+                t101.building_area_self_use_sum,
+                t102.total,
+                t100.area_order,
+                0 as city_order
+            from
+                t100
+            left join t101 on
+                t100.area_no = t101.area_no
+            left join t102 on
+                t100.area_no = t102.area_no
+            ),
+            t200 as (
+            select
+                b.id as area_no,
+                b.name as area_name,
+                a.id as city_no,
+                a.name as city_name,
+                b.order_num as area_order,
+                a.order_num as city_order
+            from
+                common.organization a
+            left join common.organization b on
+                a.parent_id = b.id
+            where
+                a.unhide = 1
+                and a.grade = 2
+                and a.parent_id not in ('-11', '-12')
+            order by
+                b.id,
+                a.id
+            ),
+            t201 as (
+            select
+                area_no,
+                city_no,
+                sum(building_area_self_use) as building_area_self_use_sum
+            from
+                house.room_month
+            where
+                second_room_type = '办公用房'
+                and area_no not in ('-11', '-12')
+                    and year_month = {year_month}
+                group by
+                    area_no,
+                    city_no
+            ),
+            t202 as (
+            select
+                *
+            from
+                house.staff_third_unit
+            where
+                year_month = (
+                select
+                    max(year_month)
+                from
+                    house.staff_third_unit)
+                and area_no not in ('-11', '-12')
+            ),
+            t203 as (
+            select
+                t200.area_no,
+                t200.area_name,
+                t200.city_no,
+                t200.city_name,
+                t201.building_area_self_use_sum,
+                t202.total,
+                t200.area_order,
+                t200.city_order
+            from
+                t200
+            left join t201 on
+                t200.area_no = t201.area_no
+                and t200.city_no = t201.city_no
+            left join t202 on
+                t200.area_no = t202.area_no
+                and t200.city_no = t202.city_no
+            ),
+            t301 as (
+            select
+                *
+            from
+                t103
+            union all
+            select
+                *
+            from
+                t203
+            )
+            insert
+                into
+                house.building_office_area_stat
+            (
+            year_month,
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                building_area_self_use_sum,
+                total,
+                area_avg,
+                area_order,
+                city_order
+            )
+            select
+                {year_month} as year_month,
+                area_no,
+                area_name,
+                city_no,
+                city_name,
+                coalesce(building_area_self_use_sum, 0) as building_area_self_use_sum,
+                coalesce(total, 0) as total,
+                case
+                    when total = 0 then null
+                    else round(coalesce(building_area_self_use_sum, 0) / total, 2)
+                end as area_avg,
+                area_order,
+                city_order
+            from
+                t301
+            order by
+                area_order,
+                city_order
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+
+
+data_process()
+data_import()
+upload_file()
+data_update()

+ 430 - 0
house/house-land/house_land.py

@@ -0,0 +1,430 @@
+"""不动产土地数据处理
+"""
+
+# 导入必要的库
+import re  # 用于正则表达式处理
+from datetime import datetime  # 用于日期时间操作
+from dateutil.relativedelta import relativedelta  # 用于相对日期计算
+from loguru import logger  # 日志记录工具
+import pandas as pd  # 数据处理库
+import psycopg  # PostgreSQL数据库连接库
+import subprocess
+import paramiko
+
+# 配置日志记录器,将日志写入文件a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/house/land/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+# 获取当前日期,并计算上个月的第一天
+today = datetime.today()
+start_date = today - relativedelta(months=1, day=1)
+year_month = start_date.strftime('%Y%m')
+# 数据文件路径
+input_path = 'data.xlsx'
+# 输出文件路径
+output_path = 'output.csv'
+
+
+def data_process():
+    org_map = {}  # 组织机构ID到组织机构信息的映射
+    third_org_list_map = {}  # 二级组织机构ID到其三级子组织列表的映射
+    area_map = {}  # 区域ID到区域信息的映射
+    districts_list_map = {}  # 城市ID到其区县列表的映射
+
+    # 连接到PostgreSQL数据库
+    with psycopg.connect(
+            conninfo=conn_info,
+            row_factory=psycopg.rows.dict_row  # 使用字典格式返回查询结果
+    ) as conn:
+        with conn.cursor() as curs:
+            # 查询所有一级组织机构(grade=1)
+            sql = """
+            select * from common.organization where grade = 1
+            """
+            logger.info(f"sql: {sql}")  # 记录SQL语句到日志
+            curs.execute(sql)
+            second_orgs = curs.fetchall()  # 获取所有一级组织机构
+            for x in second_orgs:
+                third_org_list_map[x['id']] = []  # 初始化二级组织机构的三级子组织列表
+
+            # 查询所有组织机构
+            sql = """
+            select * from common.organization
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            orgs = curs.fetchall()
+            for x in orgs:
+                if x['parent_id'] in third_org_list_map:
+                    # 如果该组织的父级是二级组织,则将其加入对应的三级子组织列表
+                    third_org_list_map[x['parent_id']].append(x)
+                org_map[x['id']] = x  # 将组织机构信息存入全局映射
+
+            # 查询所有一级区域(area_grade=1)
+            sql = """
+            select * from common.area where area_grade = 1 order by area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            cities = curs.fetchall()  # 获取所有一级区域(城市)
+            for x in cities:
+                districts_list_map[x['area_id']] = []  # 初始化城市的区县列表
+
+            # 查询所有区域
+            sql = """
+            select * from common.area
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            areas = curs.fetchall()
+            for x in areas:
+                if x['parent_id'] in districts_list_map:
+                    # 如果该区域的父级是城市,则将其加入对应城市的区县列表
+                    districts_list_map[x['parent_id']].append(x)
+                area_map[x['area_id']] = x  # 将区域信息存入全局映射
+
+    # 读取Excel文件中的数据
+    df = pd.read_excel(io=input_path)
+
+    # 删除字符串字段中的空白字符
+    df = df.map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x)
+
+    # 去重:根据“土地ID”列去重,保留最后一条记录
+    df.drop_duplicates(subset=['土地ID'], keep='last', inplace=True)
+
+
+    # 定义函数:获取二级组织机构编码
+    def get_area_no(x):
+        second_unit = x['资产所属单位(二级)']
+        third_unit = x['资产所属单位(三级)']
+        if '长途通信传输局' == second_unit:
+            return '-11'
+        if '保定' in second_unit and ('雄县' in third_unit or '容城' in third_unit or '安新' in third_unit):
+            return '782'
+        for second_org in second_orgs:
+            area_name = second_org['name']
+            area_no = second_org['id']
+            if area_name in second_unit:
+                return area_no
+        return '-12'
+
+
+    # 应用函数,生成“二级组织机构编码”列
+    df['二级组织机构编码'] = df.apply(get_area_no, axis=1)
+
+
+    # 定义函数:获取二级组织机构名称
+    def get_area_name(x):
+        area_no = x['二级组织机构编码']
+        second_org = org_map[area_no]
+        area_name = second_org['name']
+        return area_name
+
+
+    # 应用函数,生成“二级组织机构名称”列
+    df['二级组织机构名称'] = df.apply(get_area_name, axis=1)
+
+
+    # 定义函数:获取三级组织机构编码
+    def get_city_no(x):
+        third_unit = x['资产所属单位(三级)']
+        area_name = x['二级组织机构名称']
+        area_no = x['二级组织机构编码']
+        if area_name == '石家庄':
+            if '矿区' in third_unit:
+                return 'D0130185'
+            if '井陉' in third_unit:
+                return 'D0130121'
+        if area_name == '秦皇岛':
+            if '北戴河新区' in third_unit:
+                return 'D0130185'
+            if '北戴河' in third_unit:
+                return 'D0130304'
+        if area_name == '唐山':
+            if '滦县' in third_unit:
+                return 'D0130223'
+            if '高新技术开发区' in third_unit:
+                return 'D0130205'
+        if area_name == '邢台':
+            if '内丘' in third_unit:
+                return 'D0130523'
+            if '任泽' in third_unit:
+                return 'D0130526'
+        if area_name == '邯郸':
+            if '峰峰' in third_unit:
+                return 'D0130406'
+        if area_name == '省机动局':
+            if '沧州' in third_unit:
+                return 'HECS180'
+            if '唐山' in third_unit:
+                return 'HECS181'
+            if '秦皇岛' in third_unit:
+                return 'HECS182'
+            if '廊坊' in third_unit:
+                return 'HECS183'
+            if '张家口' in third_unit:
+                return 'HECS184'
+            if '邢台' in third_unit:
+                return 'HECS185'
+            if '邯郸' in third_unit:
+                return 'HECS186'
+            if '保定' in third_unit:
+                return 'HECS187'
+            if '石家庄' in third_unit:
+                return 'HECS188'
+            if '承德' in third_unit:
+                return 'HECS189'
+            if '衡水' in third_unit:
+                return 'HECS720'
+            if '雄安' in third_unit:
+                return 'HECS728'
+            return 'HECS018'
+        if '雄安' == area_name:
+            third_unit = third_unit.replace('雄安新区', '')
+        third_org_list = third_org_list_map[area_no]
+        for third_org in third_org_list:
+            city_name = third_org['name']
+            if city_name in third_unit:
+                return third_org['id']
+        if '沧州' == area_name:
+            return 'D0130911'
+        if '唐山' == area_name:
+            return 'D0130202'
+        if '秦皇岛' == area_name:
+            return 'D0130302'
+        if '廊坊' == area_name:
+            return 'D0131000'
+        if '张家口' == area_name:
+            return 'D0130701'
+        if '邢台' == area_name:
+            return 'D0130502'
+        if '邯郸' == area_name:
+            return 'D0130402'
+        if '保定' == area_name:
+            return 'D0130601'
+        if '石家庄' == area_name:
+            return 'D0130186'
+        if '承德' == area_name:
+            return 'D0130801'
+        if '衡水' == area_name:
+            return 'D0133001'
+        if '雄安' == area_name:
+            return 'D0130830'
+        return 'HE001'
+
+
+    # 应用函数,生成“三级组织机构编码”列
+    df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
+
+
+    # 定义函数:获取三级组织机构名称
+    def get_city_name(x):
+        city_no = x['三级组织机构编码']
+        third_org = org_map[city_no]
+        city_name = third_org['name']
+        return city_name
+
+
+    # 应用函数,生成“三级组织机构名称”列
+    df['三级组织机构名称'] = df.apply(get_city_name, axis=1)
+
+
+    # 定义函数:获取城市ID
+    def get_city_id(x):
+        address = x['标准地址']
+        second_unit = x['资产所属单位(二级)']
+        third_unit = x['资产所属单位(三级)']
+        if '雄安' in address or ('保定' in address and ('雄县' in address or '容城' in address or '安新' in address)):
+            return '133100'
+        for city in cities:
+            area_name = city['short_name']
+            area_id = city['area_id']
+            if area_name in second_unit:
+                return area_id
+            if area_name in third_unit:
+                return area_id
+            if area_name in address:
+                return area_id
+        return ''
+
+
+    # 应用函数,生成“city_id”列
+    df['city_id'] = df.apply(get_city_id, axis=1)
+
+
+    # 定义函数:获取城市名称
+    def get_city(x):
+        city_id = x['city_id']
+        area = area_map.get(city_id)
+        if pd.notna(area):
+            city = area['area_name']
+            return city
+        return ''
+
+
+    # 应用函数,生成“city”列
+    df['city'] = df.apply(get_city, axis=1)
+
+
+    # 定义函数:获取区县ID
+    def get_district_id(x):
+        address = x['标准地址']
+        city = x['city']
+        city_id = x['city_id']
+        if pd.isna(city) or pd.isna(address):
+            return ''
+        if city == '石家庄':
+            if '矿区' in address:
+                return '130107'
+            if '井陉' in address:
+                return '130121'
+        if city == '唐山':
+            if '滦县' in address:
+                return '130284'
+        if city == '邢台':
+            if '内邱' in address:
+                return '130523'
+            if '任县' in address:
+                return '130505'
+        if city == '雄安':
+            address = address.replace('雄安新区', '')
+        districts = districts_list_map.get(city_id)
+        if not districts:
+            return ''
+        for district in districts:
+            district_name = district['short_name']
+            if district_name in address:
+                return district['area_id']
+        return ''
+
+
+    # 应用函数,生成“district_id”列
+    df['district_id'] = df.apply(get_district_id, axis=1)
+
+
+    # 定义函数:获取区县名称
+    def get_district(x):
+        district_id = x['district_id']
+        area = area_map.get(district_id)
+        if pd.notna(area):
+            district = area['area_name']
+            return district
+        return ''
+
+
+    # 应用函数,生成“district”列
+    df['district'] = df.apply(get_district, axis=1)
+    # 在DataFrame中插入“年月”列
+    df.insert(0, '年月', year_month)
+
+    # 打印DataFrame的基本信息
+    print(df.info())
+
+    # 将处理后的数据保存为CSV文件
+    df.to_csv(
+        path_or_buf=output_path,
+        index=False,
+        header=[
+            'year_month', 'first_unit', 'second_unit', 'third_unit', 'land_name', 'land_id', 'land_ownership',
+            'use_right_type', 'land_use', 'acquisition_date', 'idle_start_date', 'site_name', 'site_id',
+            'address', 'investor', 'management_level', 'ownership_status', 'usage_status', 'total_land_area',
+            'land_area_self_use', 'land_area_idle', 'land_area_rent', 'land_area_unusable', 'has_land_deed',
+            'no_land_deed_reason', 'land_preservation_risk', 'open_space', 'courtyard', 'unrelated_assets',
+            'assets_num', 'assets_tag_num', 'responsible_department', 'person_in_charge', 'lng_jt', 'lat_jt',
+            'property_owner', 'special_specification', 'area_no', 'area_name', 'city_no', 'city_name', 'city_id',
+            'city', 'district_id', 'district'
+        ],
+        encoding='utf-8-sig'  # 确保中文字符不会乱码
+    )
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "house.land_month"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "year_month,first_unit,second_unit,third_unit,land_name,land_id,land_ownership,use_right_type,land_use,acquisition_date,idle_start_date,site_name,site_id,address,investor,management_level,ownership_status,usage_status,total_land_area,land_area_self_use,land_area_idle,land_area_rent,land_area_unusable,has_land_deed,no_land_deed_reason,land_preservation_risk,open_space,courtyard,unrelated_assets,assets_num,assets_tag_num,responsible_department,person_in_charge,lng_jt,lat_jt,property_owner,special_specification,area_no,area_name,city_no,city_name,city_id,city,district_id,district"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    remote_path = f'{remote_dir_path}{year_month}.xlsx'  # 定义远程主机的目标文件路径
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            # 记录日志,提示即将上传的本地文件和远程目标路径
+            logger.info("upload {} to {}", input_path, remote_path)
+            # 使用SFTP的put方法将本地文件上传到远程主机
+            sftp.put(input_path, remote_path)
+            # 记录日志,提示文件已成功上传
+            logger.info("uploaded {}", input_path)
+
+
+def data_update():
+    with psycopg.connect(
+            conninfo=conn_info,
+    ) as conn:
+        with conn.cursor() as curs:
+            # 更新局址编号
+            sql = f"""
+            update
+                house.land_month a
+            set
+                site_num = b.site_num
+            from
+                house.site_month b
+            where
+                a.site_id = b.site_id
+                and a.year_month = b.year_month
+                and a.year_month = {year_month}
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            logger.info(f"update {curs.rowcount}")
+
+
+data_process()
+data_import()
+upload_file()
+data_update()

+ 405 - 0
house/house-site/house_site.py

@@ -0,0 +1,405 @@
+"""不动产局址数据处理
+"""
+
+import re
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from loguru import logger
+import pandas as pd
+import psycopg
+import subprocess
+import paramiko
+
+# 配置日志记录器,将日志写入文件a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/house/site/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+# 获取当前日期,并计算上个月的第一天
+today = datetime.today()
+start_date = today - relativedelta(months=1, day=1)
+year_month = start_date.strftime('%Y%m')
+# 数据文件路径
+input_path = 'data.xlsx'
+# 输出文件路径
+output_path = 'output.csv'
+
+
+def data_process():
+    # 定义全局字典变量,用于存储组织和区域的映射关系
+    org_map = {}
+    third_org_list_map = {}
+    area_map = {}
+    districts_list_map = {}
+
+    # 使用 psycopg 连接 PostgreSQL 数据库
+    with psycopg.connect(
+            conninfo=conn_info,
+            row_factory=psycopg.rows.dict_row  # 使用字典格式返回查询结果
+    ) as conn:
+        with conn.cursor() as curs:
+            # 查询 grade=1 的二级组织信息
+            sql = """
+            select * from common.organization where grade = 1
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            second_orgs = curs.fetchall()
+            # 初始化 third_org_list_map,key 为二级组织的 id,value 为空列表
+            for x in second_orgs:
+                third_org_list_map[x['id']] = []
+
+            # 查询所有组织信息
+            sql = """
+            select * from common.organization
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            orgs = curs.fetchall()
+            # 将组织信息填充到 third_org_list_map 和 org_map 中
+            for x in orgs:
+                if x['parent_id'] in third_org_list_map:
+                    third_org_list_map[x['parent_id']].append(x)
+                org_map[x['id']] = x
+
+            # 查询 area_grade=1 的一级区域信息
+            sql = """
+            select * from common.area where area_grade = 1 order by area_id
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            cities = curs.fetchall()
+            # 初始化 districts_list_map,key 为一级区域的 area_id,value 为空列表
+            for x in cities:
+                districts_list_map[x['area_id']] = []
+
+            # 查询所有区域信息
+            sql = """
+            select * from common.area
+            """
+            logger.info(f"sql: {sql}")
+            curs.execute(sql)
+            areas = curs.fetchall()
+            # 将区域信息填充到 districts_list_map 和 area_map 中
+            for x in areas:
+                if x['parent_id'] in districts_list_map:
+                    districts_list_map[x['parent_id']].append(x)
+                area_map[x['area_id']] = x
+
+    # 读取 Excel 文件数据
+    df = pd.read_excel(io=input_path)
+
+    # 删除字符串类型的列中的空白字符
+    df = df.map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x)
+
+    # 删除重复行,基于 '局址ID' 列,保留最后一行
+    df.drop_duplicates(subset=['局址ID'], keep='last', inplace=True)
+
+
+    def get_area_no(x):
+        """
+        获取二级组织机构编码
+        """
+        second_unit = x['资产所属单位(二级)']
+        third_unit = x['资产所属单位(三级)']
+        if '长途通信传输局' == second_unit:
+            return '-11'
+        if '保定' in second_unit and ('雄县' in third_unit or '容城' in third_unit or '安新' in third_unit):
+            return '782'
+        for second_org in second_orgs:
+            area_name = second_org['name']
+            area_no = second_org['id']
+            if area_name in second_unit:
+                return area_no
+        return '-12'
+
+
+    df['二级组织机构编码'] = df.apply(get_area_no, axis=1)
+
+
+    def get_area_name(x):
+        """
+        根据二级组织机构编码获取二级组织机构名称
+        """
+        area_no = x['二级组织机构编码']
+        second_org = org_map[area_no]
+        area_name = second_org['name']
+        return area_name
+
+
+    df['二级组织机构名称'] = df.apply(get_area_name, axis=1)
+
+
+    def get_city_no(x):
+        """
+        获取三级组织机构编码
+        """
+        third_unit = x['资产所属单位(三级)']
+        area_name = x['二级组织机构名称']
+        area_no = x['二级组织机构编码']
+        if area_name == '石家庄':
+            if '矿区' in third_unit:
+                return 'D0130185'
+            if '井陉' in third_unit:
+                return 'D0130121'
+        if area_name == '秦皇岛':
+            if '北戴河新区' in third_unit:
+                return 'D0130185'
+            if '北戴河' in third_unit:
+                return 'D0130304'
+        if area_name == '唐山':
+            if '滦县' in third_unit:
+                return 'D0130223'
+            if '高新技术开发区' in third_unit:
+                return 'D0130205'
+        if area_name == '邢台':
+            if '内丘' in third_unit:
+                return 'D0130523'
+            if '任泽' in third_unit:
+                return 'D0130526'
+        if area_name == '邯郸':
+            if '峰峰' in third_unit:
+                return 'D0130406'
+        if area_name == '省机动局':
+            if '沧州' in third_unit:
+                return 'HECS180'
+            if '唐山' in third_unit:
+                return 'HECS181'
+            if '秦皇岛' in third_unit:
+                return 'HECS182'
+            if '廊坊' in third_unit:
+                return 'HECS183'
+            if '张家口' in third_unit:
+                return 'HECS184'
+            if '邢台' in third_unit:
+                return 'HECS185'
+            if '邯郸' in third_unit:
+                return 'HECS186'
+            if '保定' in third_unit:
+                return 'HECS187'
+            if '石家庄' in third_unit:
+                return 'HECS188'
+            if '承德' in third_unit:
+                return 'HECS189'
+            if '衡水' in third_unit:
+                return 'HECS720'
+            if '雄安' in third_unit:
+                return 'HECS728'
+            return 'HECS018'
+        if '雄安' == area_name:
+            third_unit = third_unit.replace('雄安新区', '')
+        third_org_list = third_org_list_map[area_no]
+        for third_org in third_org_list:
+            city_name = third_org['name']
+            if city_name in third_unit:
+                return third_org['id']
+        if '沧州' == area_name:
+            return 'D0130911'
+        if '唐山' == area_name:
+            return 'D0130202'
+        if '秦皇岛' == area_name:
+            return 'D0130302'
+        if '廊坊' == area_name:
+            return 'D0131000'
+        if '张家口' == area_name:
+            return 'D0130701'
+        if '邢台' == area_name:
+            return 'D0130502'
+        if '邯郸' == area_name:
+            return 'D0130402'
+        if '保定' == area_name:
+            return 'D0130601'
+        if '石家庄' == area_name:
+            return 'D0130186'
+        if '承德' == area_name:
+            return 'D0130801'
+        if '衡水' == area_name:
+            return 'D0133001'
+        if '雄安' == area_name:
+            return 'D0130830'
+        return 'HE001'
+
+
+    df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
+
+
+    def get_city_name(x):
+        """
+        根据三级组织机构编码获取三级组织机构名称
+        """
+        city_no = x['三级组织机构编码']
+        third_org = org_map[city_no]
+        city_name = third_org['name']
+        return city_name
+
+
+    df['三级组织机构名称'] = df.apply(get_city_name, axis=1)
+
+
+    def get_city_id(x):
+        """
+        根据标准地址、资产所属单位(二级、三级)获取城市 ID
+        """
+        address = x['标准地址']
+        second_unit = x['资产所属单位(二级)']
+        third_unit = x['资产所属单位(三级)']
+        if '雄安' in address or ('保定' in address and ('雄县' in address or '容城' in address or '安新' in address)):
+            return '133100'
+        for city in cities:
+            area_name = city['short_name']
+            area_id = city['area_id']
+            if area_name in second_unit:
+                return area_id
+            if area_name in third_unit:
+                return area_id
+            if area_name in address:
+                return area_id
+        return ''
+
+
+    df['city_id'] = df.apply(get_city_id, axis=1)
+
+
+    def get_city(x):
+        """
+        根据城市 ID 获取城市名称
+        """
+        city_id = x['city_id']
+        area = area_map.get(city_id)
+        if pd.notna(area):
+            city = area['area_name']
+            return city
+        return ''
+
+
+    df['city'] = df.apply(get_city, axis=1)
+
+
+    def get_district_id(x):
+        """
+        根据标准地址、城市名称和城市 ID 获取区县 ID
+        """
+        address = x['标准地址']
+        city = x['city']
+        city_id = x['city_id']
+        if pd.isna(city) or pd.isna(address):
+            return ''
+        if city == '石家庄':
+            if '矿区' in address:
+                return '130107'
+            if '井陉' in address:
+                return '130121'
+        if city == '唐山':
+            if '滦县' in address:
+                return '130284'
+        if city == '邢台':
+            if '内邱' in address:
+                return '130523'
+            if '任县' in address:
+                return '130505'
+        if city == '雄安':
+            address = address.replace('雄安新区', '')
+        districts = districts_list_map.get(city_id)
+        if not districts:
+            return ''
+        for district in districts:
+            district_name = district['short_name']
+            if district_name in address:
+                return district['area_id']
+        return ''
+
+
+    df['district_id'] = df.apply(get_district_id, axis=1)
+
+
+    def get_district(x):
+        """
+        根据区县 ID 获取区县名称
+        """
+        district_id = x['district_id']
+        area = area_map.get(district_id)
+        if pd.notna(area):
+            district = area['area_name']
+            return district
+        return ''
+
+
+    df['district'] = df.apply(get_district, axis=1)
+    # 在 DataFrame 中插入 '年月' 列
+    df.insert(0, '年月', year_month)
+    # 打印 DataFrame 的基本信息
+    print(df.info())
+    # 将处理后的数据保存为 CSV 文件
+    df.to_csv(path_or_buf=output_path,
+              index=False,
+              header=['year_month', 'site_id', 'first_unit', 'second_unit', 'third_unit', 'site_num', 'site_name',
+                      'address', 'city_level', 'city_region', 'area_sector', 'has_land', 'area_no', 'area_name', 'city_no',
+                      'city_name', 'city_id', 'city', 'district_id', 'district'],
+              encoding='utf-8-sig')
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "house.site_month"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "year_month,site_id,first_unit,second_unit,third_unit,site_num,site_name,address,city_level,city_region,area_sector,has_land,area_no,area_name,city_no,city_name,city_id,city,district_id,district"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    remote_path = f'{remote_dir_path}{year_month}.xlsx'  # 定义远程主机的目标文件路径
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            # 记录日志,提示即将上传的本地文件和远程目标路径
+            logger.info("upload {} to {}", input_path, remote_path)
+            # 使用SFTP的put方法将本地文件上传到远程主机
+            sftp.put(input_path, remote_path)
+            # 记录日志,提示文件已成功上传
+            logger.info("uploaded {}", input_path)
+
+
+data_process()
+data_import()
+upload_file()

+ 363 - 0
house/house-zu-ru-he-tong/house_zu_ru_he_tong.py

@@ -0,0 +1,363 @@
+"""不动产租入合同数据处理
+"""
+
+import re  # 导入正则表达式模块,用于字符串处理
+import decimal  # 导入decimal模块,用于高精度的数值计算
+import subprocess
+from datetime import datetime  # 导入datetime模块,用于日期和时间操作
+from dateutil.relativedelta import relativedelta  # 导入relativedelta模块,用于日期之间的相对差异计算
+from loguru import logger  # 导入loguru模块,用于日志记录
+import pandas as pd  # 导入pandas模块,用于数据处理和分析
+import psycopg  # 导入psycopg模块,用于连接PostgreSQL数据库
+import paramiko
+
+# 配置日志记录器,将日志写入文件a.log
+logger.add(sink='a.log')
+ssh_hostname = '172.16.107.4'  # 定义远程主机地址
+ssh_port = 22  # 定义SSH服务的端口号
+ssh_username = 'app'  # 定义登录远程主机的用户名
+ssh_password = '(l4w0ST_'  # 定义登录远程主机的密码
+# 服务器文件夹路径
+remote_dir_path = '/data/history/house/zu-ru-he-tong/'
+# 数据库连接信息
+db_host = "172.16.107.5"  # 数据库主机地址
+db_port = 5432         # 数据库端口号
+db_username = "finance"  # 数据库用户名
+db_password = "Finance@unicom23"  # 数据库密码
+dbname = "financialdb"       # 数据库名称
+conn_info = f"host='{db_host}' port={db_port} user='{db_username}' password='{db_password}' dbname='{dbname}'"
+# 获取当前日期,并计算上个月的第一天
+today = datetime.today()
+start_date = today - relativedelta(months=1, day=1)
+year_month = start_date.strftime('%Y%m')
+# 数据文件路径
+input_path = 'data.xlsx'
+# 输出文件路径
+output_path = 'output.csv'
+
+
+def data_process():
+    org_map = {}  # 存储所有组织机构的ID与详细信息的映射
+    third_org_list_map = {}  # 存储二级组织机构与其下属三级组织机构的映射
+    area_map = {}  # 存储所有区域的ID与详细信息的映射
+    districts_list_map = {}  # 存储一级区域与其下属子区域的映射
+    # 连接到PostgreSQL数据库,并使用字典格式返回查询结果
+    with psycopg.connect(
+            conninfo=conn_info,
+            row_factory=psycopg.rows.dict_row  # 使用字典格式返回查询结果
+    ) as conn:
+        with conn.cursor() as curs:
+            # 查询grade为1的组织机构(二级组织机构)
+            sql = """
+        select * from common.organization where grade = 1
+        """
+            logger.info(f"sql: {sql}")  # 记录SQL语句到日志
+            curs.execute(sql)
+            second_orgs = curs.fetchall()
+            for x in second_orgs:
+                third_org_list_map[x['id']] = []  # 初始化每个二级组织机构的三级组织机构列表
+
+            # 查询所有组织机构
+            sql = """
+        select * from common.organization
+        """
+            logger.info(f"sql: {sql}")  # 记录SQL语句到日志
+            curs.execute(sql)
+            orgs = curs.fetchall()
+            for x in orgs:
+                if x['parent_id'] in third_org_list_map:
+                    third_org_list_map[x['parent_id']].append(x)  # 将三级组织机构添加到对应二级组织机构的列表中
+                org_map[x['id']] = x  # 将组织机构ID与详细信息存入org_map
+
+            # 查询area_grade为1的区域(一级区域)
+            sql = """
+        select * from common.area where area_grade = 1 order by area_id
+        """
+            logger.info(f"sql: {sql}")  # 记录SQL语句到日志
+            curs.execute(sql)
+            cities = curs.fetchall()
+            for x in cities:
+                districts_list_map[x['area_id']] = []  # 初始化每个一级区域的子区域列表
+
+            # 查询所有区域
+            sql = """
+        select * from common.area
+        """
+            logger.info(f"sql: {sql}")  # 记录SQL语句到日志
+            curs.execute(sql)
+            areas = curs.fetchall()
+            for x in areas:
+                if x['parent_id'] in districts_list_map:
+                    districts_list_map[x['parent_id']].append(x)  # 将子区域添加到对应一级区域的列表中
+                area_map[x['area_id']] = x  # 将区域ID与详细信息存入area_map
+    # 读取Excel文件中的数据,并跳过第一行
+    df = pd.read_excel(io=input_path, skiprows=1)
+    # 删除指定列中的空白字符
+    columns_to_clean = list(filter(lambda x: x not in ('签订时间'), df.columns))  # 排除“签订时间”列
+    df[columns_to_clean] = df[columns_to_clean].map(lambda x: re.sub(r'\s+', '', x) if type(x) is str else x)
+
+    def get_area_no(x):
+        """根据使用单位隶属的地市级公司名称获取二级组织机构编码"""
+        second_unit = x['使用单位隶属的地市级公司']
+        if '长途通信传输局' == second_unit:
+            return '-11'
+        for second_org in second_orgs:
+            area_name = second_org['name']
+            area_no = second_org['id']
+            if area_name in second_unit:
+                return area_no
+        return '-12'
+
+    df['二级组织机构编码'] = df.apply(get_area_no, axis=1)
+
+    def get_area_name(x):
+        """根据二级组织机构编码获取二级组织机构名称"""
+        area_no = x['二级组织机构编码']
+        second_org = org_map[area_no]
+        area_name = second_org['name']
+        return area_name
+
+    df['二级组织机构名称'] = df.apply(get_area_name, axis=1)
+
+    def get_city_no(x):
+        """根据使用单位隶属的区县级公司名称获取三级组织机构编码"""
+        third_unit = x['使用单位隶属的区县级公司']
+        area_name = x['二级组织机构名称']
+        area_no = x['二级组织机构编码']
+        if area_name == '石家庄':
+            if '矿区' in third_unit:
+                return 'D0130185'
+            if '井陉' in third_unit:
+                return 'D0130121'
+        if area_name == '秦皇岛':
+            if '北戴河新区' in third_unit:
+                return 'D0130185'
+            if '北戴河' in third_unit:
+                return 'D0130304'
+        if area_name == '唐山':
+            if '滦县' in third_unit:
+                return 'D0130223'
+            if '高新技术开发区' in third_unit:
+                return 'D0130205'
+        if area_name == '邢台':
+            if '内丘' in third_unit:
+                return 'D0130523'
+            if '任泽' in third_unit:
+                return 'D0130526'
+        if area_name == '邯郸':
+            if '峰峰' in third_unit:
+                return 'D0130406'
+        if area_name == '省机动局':
+            if '沧州' in third_unit:
+                return 'HECS180'
+            if '唐山' in third_unit:
+                return 'HECS181'
+            if '秦皇岛' in third_unit:
+                return 'HECS182'
+            if '廊坊' in third_unit:
+                return 'HECS183'
+            if '张家口' in third_unit:
+                return 'HECS184'
+            if '邢台' in third_unit:
+                return 'HECS185'
+            if '邯郸' in third_unit:
+                return 'HECS186'
+            if '保定' in third_unit:
+                return 'HECS187'
+            if '石家庄' in third_unit:
+                return 'HECS188'
+            if '承德' in third_unit:
+                return 'HECS189'
+            if '衡水' in third_unit:
+                return 'HECS720'
+            if '雄安' in third_unit:
+                return 'HECS728'
+            return 'HECS018'
+        if '雄安' == area_name:
+            third_unit = third_unit.replace('雄安新区', '')
+        third_org_list = third_org_list_map[area_no]
+        for third_org in third_org_list:
+            city_name = third_org['name']
+            if city_name in third_unit:
+                return third_org['id']
+        if '沧州' == area_name:
+            return 'D0130911'
+        if '唐山' == area_name:
+            return 'D0130202'
+        if '秦皇岛' == area_name:
+            return 'D0130302'
+        if '廊坊' == area_name:
+            return 'D0131000'
+        if '张家口' == area_name:
+            return 'D0130701'
+        if '邢台' == area_name:
+            return 'D0130502'
+        if '邯郸' == area_name:
+            return 'D0130402'
+        if '保定' == area_name:
+            return 'D0130601'
+        if '石家庄' == area_name:
+            return 'D0130186'
+        if '承德' == area_name:
+            return 'D0130801'
+        if '衡水' == area_name:
+            return 'D0133001'
+        if '雄安' == area_name:
+            return 'D0130830'
+        return 'HE001'
+
+    df['三级组织机构编码'] = df.apply(get_city_no, axis=1)
+
+    def get_city_name(x):
+        """根据三级组织机构编码获取三级组织机构名称"""
+        city_no = x['三级组织机构编码']
+        third_org = org_map[city_no]
+        city_name = third_org['name']
+        return city_name
+
+    df['三级组织机构名称'] = df.apply(get_city_name, axis=1)
+
+    def get_rent_months(x):
+        """根据租入开始时间和终止时间计算租期月数"""
+        rent_start_date = x['租入开始时间(合同生效时间)']
+        rent_end_date = x['租入终止时间(合同终止时间)']
+        if pd.isna(rent_start_date) or pd.isna(rent_end_date):
+            return ''
+        rent_start_date = pd.to_datetime(rent_start_date)
+        rent_end_date = pd.to_datetime(rent_end_date)
+        delta = relativedelta(rent_end_date, rent_start_date)
+        rent_months = delta.years * 12 + delta.months + (1 if delta.days > 0 else 0)
+        return rent_months
+
+    df['租期月数'] = df.apply(get_rent_months, axis=1)
+
+    def get_gross_amount_month(x):
+        """根据合同总金额和租期月数计算月含税合同额"""
+        gross_amount = x['合同总金额(含税)(元)']
+        rent_months = x['租期月数']
+        if pd.notna(gross_amount) and pd.notna(rent_months) and rent_months and rent_months > 0:
+            return (decimal.Decimal(gross_amount) / decimal.Decimal(rent_months)).quantize(decimal.Decimal('0.00'))
+        return None
+
+    df['月含税合同额'] = df.apply(get_gross_amount_month, axis=1)
+
+    def get_unit_price(x):
+        """根据租入建筑面积和月含税合同额计算每平米单价"""
+        building_area = x['租入建筑面积(平米)']
+        gross_amount_month = x['月含税合同额']
+        if pd.notna(building_area) and pd.notna(gross_amount_month) and building_area > 0 and gross_amount_month > 0:
+            return (decimal.Decimal(gross_amount_month) / decimal.Decimal(building_area)).quantize(
+                decimal.Decimal('0.00'))
+        return None
+
+    df['每平米单价'] = df.apply(get_unit_price, axis=1)
+
+    def get_rent_years(x):
+        """根据租期月数计算租期年数"""
+        rent_months = x['租期月数']
+        if pd.isna(rent_months) or not rent_months:
+            return None
+        return (decimal.Decimal(rent_months) / decimal.Decimal('12')).quantize(decimal.Decimal('0.00'))
+
+    df['rent_years'] = df.apply(get_rent_years, axis=1)
+
+    def get_unit_price2(x):
+        """根据合同总金额、租入建筑面积和租期年数计算另一种每平米单价"""
+        gross_amount = x['合同总金额(含税)(元)']
+        building_area = x['租入建筑面积(平米)']
+        rent_years = x['rent_years']
+        if pd.notna(building_area) and pd.notna(gross_amount) and pd.notna(
+                rent_years) and building_area > 0 and gross_amount > 0 and rent_years > 0:
+            return (decimal.Decimal(gross_amount) / decimal.Decimal(building_area) / decimal.Decimal(
+                rent_years) / decimal.Decimal(12)).quantize(decimal.Decimal('0.00'))
+        return None
+
+    df['unit_price2'] = df.apply(get_unit_price2, axis=1)
+
+
+    def remove_extra_dots(s):
+        if pd.isna(s) or not s:
+            return None
+        match = re.search(r'\.', s)
+        if match:
+            first_dot_index = match.start()
+            return s[:first_dot_index + 1] + s[first_dot_index + 1:].replace('.', '')
+        else:
+            return s
+
+
+    df['地址经度坐标'] = df['地址经度坐标'].map(remove_extra_dots)
+    df['地址纬度坐标'] = df['地址纬度坐标'].map(remove_extra_dots)
+    df.insert(0, '年月', year_month)  # 在数据框的第一列插入年月字段
+    # 打印数据框的基本信息
+    print(df.info())
+    # 将处理后的数据保存到CSV文件中
+    df.to_csv(path_or_buf=output_path,
+              index=False,
+              header=['year_month', 'serial_no', 'data_num', 'house_name', 'owner_type', 'rent_type', 'first_address',
+                      'second_address', 'third_address', 'fourth_address', 'city_region', 'area_sector', 'lng', 'lat',
+                      'building_area', 'usable_area', 'investor', 'unit_level', 'first_unit', 'second_unit',
+                      'third_unit', 'field', 'use_type', 'use_description', 'building_area_self_use',
+                      'building_area_sublet', 'first_rent_date', 'contract_no', 'contract_name', 'contract_type',
+                      'sign_date', 'lessee', 'lessor', 'gross_amount', 'vat', 'rent_start_date', 'rent_end_date',
+                      'undertaking_department', 'undertaker', 'phone', 'amount_accrued', 'amount_reimbursement',
+                      'contract_nature', 'contract_status', 'area_no', 'area_name', 'city_no', 'city_name',
+                      'rent_months', 'gross_amount_month', 'unit_price', 'rent_years', 'unit_price2'],
+              encoding='utf-8-sig')
+
+
+def data_import():
+    # 定义 PowerShell 脚本的路径
+    script_path = r"../../copy.ps1"
+    # 目标表和文件信息
+    table = "house.rent_in_month"  # 数据库目标表名
+    # 表字段列名,用于指定导入数据的列顺序
+    columns = "year_month,serial_no,data_num,house_name,owner_type,rent_type,first_address,second_address,third_address,fourth_address,city_region,area_sector,lng,lat,building_area,usable_area,investor,unit_level,first_unit,second_unit,third_unit,field,use_type,use_description,building_area_self_use,building_area_sublet,first_rent_date,contract_no,contract_name,contract_type,sign_date,lessee,lessor,gross_amount,vat,rent_start_date,rent_end_date,undertaking_department,undertaker,phone,amount_accrued,amount_reimbursement,contract_nature,contract_status,area_no,area_name,city_no,city_name,rent_months,gross_amount_month,unit_price,rent_years,unit_price2"
+    # 构造执行 PowerShell 脚本的命令
+    command = f"powershell -File {script_path} -db_host {db_host} -db_port {db_port} -db_username {db_username} -db_password {db_password} -dbname {dbname} -table {table} -filename {output_path} -columns {columns}"
+    # 打印生成的命令,方便调试和日志记录
+    logger.info("command: {}", command)
+    # 使用 subprocess 模块运行 PowerShell 命令,并捕获输出
+    completed_process = subprocess.run(
+        command,  # 执行的命令
+        check=False,  # 如果命令执行失败,不抛出异常
+        text=True,  # 将输出作为字符串处理
+        capture_output=True,  # 捕获标准输出和标准错误
+    )
+    # 打印命令执行的结果,包括返回码、标准输出和标准错误
+    logger.info("导入结果:\n{}\n{}\n{}", completed_process.returncode, completed_process.stdout,
+                completed_process.stderr)
+    # 定义正则表达式,用于匹配标准输出中的 COPY 结果
+    p = re.compile(r"^(COPY) (\d+)$")
+    count = None  # 初始化计数变量
+    matcher = p.match(completed_process.stdout)  # 匹配标准输出中的 COPY 结果
+    if matcher:
+        count = int(matcher.group(2))  # 提取导入的数据行数
+    # 如果没有成功提取到导入数据的行数,抛出运行时异常
+    if count is None:
+        raise RuntimeError("导入数据失败")
+
+
+def upload_file():
+    remote_path = f'{remote_dir_path}{year_month}.xlsx'  # 定义远程主机的目标文件路径
+    # 使用paramiko.SSHClient创建一个SSH客户端对象,并通过with语句管理其上下文
+    with paramiko.SSHClient() as ssh:
+        # 设置自动添加主机密钥策略,避免因未知主机密钥导致连接失败
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        # 连接到远程主机,传入主机地址、端口、用户名和密码
+        ssh.connect(ssh_hostname, port=ssh_port, username=ssh_username, password=ssh_password)
+        # 执行远程命令,创建远程目录(如果不存在)
+        ssh.exec_command(f'mkdir -p {remote_dir_path}')
+        # 打开SFTP会话,用于文件传输,并通过with语句管理其上下文
+        with ssh.open_sftp() as sftp:
+            # 记录日志,提示即将上传的本地文件和远程目标路径
+            logger.info("upload {} to {}", input_path, remote_path)
+            # 使用SFTP的put方法将本地文件上传到远程主机
+            sftp.put(input_path, remote_path)
+            # 记录日志,提示文件已成功上传
+            logger.info("uploaded {}", input_path)
+
+
+data_process()
+data_import()
+upload_file()

+ 50 - 0
readme.md

@@ -0,0 +1,50 @@
+# 财务数据处理
+
+**注意事项:不动产局址数据需要在建筑、土地之前处理**
+
+```
+│  copy.ps1 psql导入csv脚本
+│  test_token.py 生成测试token
+│
+├─car
+│  ├─car-chao-bao
+│  │      car_chao-bao.py 车辆超保数据处理
+│  │
+│  ├─car-guo-jian
+│  │      car_guo_jian.py 车辆过检数据处理
+│  │
+│  ├─car-li-cheng-day-missing
+│  │      car_li_cheng_day_missing.py 睿行车辆行驶里程日统计缺失数据日期
+│  │
+│  ├─car-wei-zhang
+│  │      car_wei_zhang.py 车辆违章数据处理
+│  │
+│  ├─car-yue-jie-missing
+│  │      car_yue_jie_missing.py 睿行车辆越界报警日统计缺失数据日期
+│  │
+│  └─car-zu-lin
+│          car_zu_lin.py 车辆租赁合同数据处理
+│
+├─doc
+│      sql.md
+│
+└─house
+    ├─house-abnormal-data
+    │  │  house_abnormal_data.py 不动产异常数据稽核数据处理
+    │
+    ├─house-building
+    │      house_building.py 不动产建筑数据处理
+    │
+    ├─house-fang-jian
+    │      house_fang_jian.py 不动产房间数据处理
+    │
+    ├─house-land
+    │      house_land.py 不动产土地数据处理
+    │
+    ├─house-site
+    │      house_site.py 不动产局址数据处理
+    │
+    └─house-zu-ru-he-tong
+            house_zu_ru_he_tong.py 不动产租入合同数据处理
+```
+

+ 68 - 0
test_token.py

@@ -0,0 +1,68 @@
+"""生成测试token
+"""
+
+import base64  # 导入base64模块,用于编码和解码数据
+import json  # 导入json模块,用于处理JSON格式数据
+from Crypto.Cipher import AES  # 导入AES加密模块
+from datetime import datetime  # 导入datetime模块,用于获取当前时间
+
+# 定义加密密钥
+key = "2na$$PdV9AW8b#CS"
+# 定义一个常量字符串,用于替换加密后的Base64字符串中的特殊字符“+”
+ADD = "/add/"
+
+
+def encrypt(str, key):
+    """
+    使用AES算法对输入字符串进行加密,并返回加密后的Base64编码字符串。
+
+    参数:
+        str (str): 要加密的字符串
+        key (str): 加密密钥
+
+    返回:
+        str: 加密后的Base64编码字符串
+    """
+    # 创建AES加密器,使用ECB模式(电子密码本模式)
+    cipher = AES.new(key.encode('utf-8'), AES.MODE_ECB)
+    # 对输入字符串进行填充,使其长度符合AES块大小的要求
+    padded_data = pad(str.encode('utf-8'), AES.block_size)
+    # 使用AES加密器对填充后的数据进行加密
+    encrypted = cipher.encrypt(padded_data)
+    # 将加密后的二进制数据转换为Base64编码字符串
+    encrypted_base64 = base64.b64encode(encrypted).decode('utf-8')
+    # 去除Base64编码字符串中的换行符(如果有)
+    encrypted_base64 = encrypted_base64.replace('\n', '')
+    # 替换Base64编码字符串中的“+”符号为自定义字符串“add”,避免特殊字符冲突
+    encrypted_base64 = encrypted_base64.replace('+', ADD)
+    return encrypted_base64
+
+
+def pad(data, block_size):
+    """
+    对输入数据进行填充,使其长度成为块大小的整数倍(PKCS7填充方式)。
+
+    参数:
+        data (bytes): 要填充的字节数据
+        block_size (int): 块大小(AES块大小为16字节)
+
+    返回:
+        bytes: 填充后的字节数据
+    """
+    # 计算需要填充的字节数
+    padding = block_size - len(data) % block_size
+    # 按照PKCS7填充方式,在数据末尾添加padding个值为padding的字节
+    return data + bytes([padding]) * padding
+
+
+# 构造一个包含必要信息的字典
+s = {
+    "APP_ID": "FINANCE",  # 应用ID
+    "TIME_STAMP": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),  # 当前时间戳
+    "REQUEST_URL": "",  # 请求URL(此处为空字符串)
+    "LOGIN_ID": "test",  # 登录ID
+    "EXPIRE_TIME": 3600,  # 过期时间(单位:秒)
+}
+
+# 将字典转换为JSON格式字符串,并调用encrypt函数对其进行加密最后打印加密结果
+print(encrypt(json.dumps(s), key))