futu_opend/startup/phase1.rs
1//! v1.4.110 Layer 3 A: startup Phase 1 — bootstrap前置 (logging / metrics /
2//! 守护设施). 抽自原 `mod.rs::run_daemon` 33..219 行段.
3//!
4//! Phase 1 副作用 (按顺序):
5//! 1. keys-file 预 dry-run 验证 (REST/gRPC/WS) → fail-closed 早 abort
6//! 2. 初始化日志 (json vs plain + audit guard)
7//! 3. `tighten_secret_files_at_startup()` 把 0644 secret 文件收紧到 0600
8//! 4. 安装全局 panic hook (tracing + crash log + exit 101)
9//! 5. install futu_auth metrics registry
10//! 6. 构造 shared `RuntimeCounters`
11//! 7. 计算 `listen_addr` 并打印 "starting" 日志
12//! 8. WARN: moomoo + 显式 `--login-region`
13//! 9. 启动前端口冲突探测
14//!
15//! `--tz` / TOML `tz` 必须在 Tokio runtime 创建前应用,由
16//! [`super::apply_pre_runtime_tz`] 在 sync `main()` 里调用。
17
18use anyhow::Result;
19use std::sync::Arc;
20
21use crate::cli::Platform;
22use crate::config::RuntimeConfig;
23use crate::crash_log::write_crash_log_file;
24
25/// Phase 1 output — 必须由 caller 持有到进程退出, 否则 audit guard drop
26/// 会让 tracing-appender 后台线程提早关闭丢事件.
27pub(super) struct Phase1Out {
28 /// audit 日志 guard, drop = tracing-appender 关闭. caller 必须 outlive.
29 pub(super) _audit_guard: Option<tracing_appender::non_blocking::WorkerGuard>,
30 /// 共享 RuntimeCounters (REST + gRPC 共用一份保证跨 surface rate window 一致).
31 pub(super) shared_counters: Arc<futu_auth::RuntimeCounters>,
32 /// "ip:port" 字符串, 后续 server / WS / REST / gRPC / telnet 复用.
33 pub(super) listen_addr: String,
34 /// 从 config 复制的 keys file 路径 (Phase 4 使用).
35 pub(super) rest_keys_file: Option<std::path::PathBuf>,
36 pub(super) ws_keys_file: Option<std::path::PathBuf>,
37 pub(super) grpc_keys_file: Option<std::path::PathBuf>,
38 /// 是否允许无 auth 的 TCP listener (Phase 4 决策).
39 pub(super) allow_tcp_unauthenticated: bool,
40}
41
42pub(super) fn is_valid_iana_tz_name(tz: &str) -> bool {
43 if tz == "UTC" {
44 return true;
45 }
46 // Local shape guard for `--tz` before writing the process-wide TZ env var.
47 // We intentionally avoid a chrono_tz parse dependency here (see v1.4.87
48 // --tz decision) and accept only IANA-style slash-separated ASCII names.
49 // The 128-byte cap is a defensive config-boundary limit, not a protocol
50 // value; revisit if IANA tzdb ever grows names beyond this shape.
51 if tz.is_empty() || tz.len() > 128 || tz.starts_with('/') || tz.ends_with('/') {
52 return false;
53 }
54
55 let mut parts = 0usize;
56 for part in tz.split('/') {
57 parts += 1;
58 if part.is_empty() || part == "." || part == ".." {
59 return false;
60 }
61 if part.contains('.') {
62 return false;
63 }
64 if !part
65 .bytes()
66 .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b'+'))
67 {
68 return false;
69 }
70 }
71
72 parts >= 2
73}
74
75pub(super) fn apply_pre_runtime_tz(config: &RuntimeConfig) {
76 if let Some(tz) = &config.tz {
77 // 轻度校验 IANA name 格式 (不跑 chrono_tz parse 避免 dep 膨胀)
78 if !is_valid_iana_tz_name(tz) {
79 eprintln!(
80 "error: --tz 无效 IANA timezone '{tz}'. 示例: Asia/Hong_Kong, America/New_York, UTC"
81 );
82 std::process::exit(2);
83 }
84 // SAFETY: production main calls this before constructing the Tokio
85 // runtime, so no runtime worker thread can concurrently read the
86 // process environment. Tests only cover the shape validator; they do
87 // not call this helper concurrently.
88 unsafe {
89 std::env::set_var("TZ", tz);
90 }
91 eprintln!("ℹ️ TZ set to '{tz}' via --tz flag / TOML tz (v1.4.87 #3 G1)");
92 }
93}
94
95fn port_probe_addr(bind_ip: &str, port: u16) -> std::net::SocketAddr {
96 match bind_ip.parse::<std::net::IpAddr>() {
97 Ok(std::net::IpAddr::V4(ip)) if ip.is_unspecified() => {
98 std::net::SocketAddr::from((std::net::Ipv4Addr::LOCALHOST, port))
99 }
100 Ok(std::net::IpAddr::V6(ip)) if ip.is_unspecified() => {
101 std::net::SocketAddr::from((std::net::Ipv6Addr::LOCALHOST, port))
102 }
103 Ok(ip) => std::net::SocketAddr::new(ip, port),
104 Err(_) => std::net::SocketAddr::from((std::net::Ipv4Addr::LOCALHOST, port)),
105 }
106}
107
108pub(super) async fn run_phase1(config: &RuntimeConfig) -> Result<Phase1Out> {
109 // codex 0547 F6 (P3): 安全字段从 config 读 (而非 args.*) — TOML 也能
110 // override 这些 (与 docs "字段与 CLI 一致" 契约对齐).
111 let rest_keys_file = config.rest_keys_file.clone();
112 let ws_keys_file = config.ws_keys_file.clone();
113 let grpc_keys_file = config.grpc_keys_file.clone();
114 let audit_log = config.audit_log.clone();
115 let allow_tcp_unauthenticated = config.allow_tcp_unauthenticated;
116
117 // v1.4.104 external reviewer P1-003 (P1) fix: keys-file 解析时序前移到 broker auth /
118 // SMS 之前. 之前 schema 错的 keys-file 要等到 surface server 启动时才报错
119 // (broker auth + SMS 之后 7+ 秒), 配置错应该启动就发现.
120 //
121 // 这里只**预解析 + dry-run 验证**, 不持久化结果 (实际 Arc 在 surface server
122 // 启动时由 KeyStore::load 重新读+解析, 因为 SIGHUP reload 也走那条路径).
123 // dry-run 失败 → 立即 abort, 不进 broker auth.
124 for (label, path_opt) in [
125 ("REST", &rest_keys_file),
126 ("gRPC", &grpc_keys_file),
127 ("WS", &ws_keys_file),
128 ] {
129 if let Some(path) = path_opt {
130 match futu_auth::KeyStore::load(path) {
131 Ok(ks) => {
132 tracing::info!(
133 surface = label,
134 path = %path.display(),
135 keys_loaded = ks.len(),
136 "v1.4.104 external report P1-003 (P1): {} keys file pre-validated OK \
137 (broker auth not yet started)",
138 label
139 );
140 }
141 Err(e) => {
142 tracing::error!(
143 surface = label,
144 error = %e,
145 path = %path.display(),
146 "v1.4.104 external report P1-003 (P1): {} keys file pre-validation FAILED — \
147 abort before broker auth / SMS to fail-closed early",
148 label
149 );
150 return Err(anyhow::anyhow!(
151 "v1.4.104 external report P1-003 (P1) fix: {} keys file at {} failed schema \
152 validation: {e}. abort before broker auth / SMS. fix the keys \
153 file then restart.",
154 label,
155 path.display()
156 ));
157 }
158 }
159 }
160 }
161 // codex 0547 F6 (P3): merge_config 已提前到 args 仍可用阶段 (~line 1006);
162 // 此处不再重复 capture inject_auth_failure_every / merge_config.
163 // dev-flags 在 cfg(feature = "dev-flags") 路径上, capture 已在 args 解析后立即做.
164
165 // 1. 初始化日志(--log-level 参数生效,RUST_LOG 环境变量优先)
166 // audit 日志 guard 必须活到进程退出,否则 tracing-appender 后台线程可能丢事件。
167 let _audit_guard = if config.json_log {
168 // v1.4.27(BUG-7,加拿大同事 v1.4.26 回归测试发现):`--audit-log` 和
169 // `--json-log` 一起用时,之前是**静默忽略** `--audit-log`(只打 warn
170 // 到 stderr)、创建空文件;用户会误以为"没有审计事件发生"。现在改
171 // 硬失败 → 用户必须显式二选一,避免审计文件空导致的合规事故。
172 if audit_log.is_some() {
173 eprintln!(
174 "error: --audit-log and --json-log are mutually exclusive.\n\
175 - --json-log: entire stderr as JSONL (full event stream)\n\
176 - --audit-log: only target=futu_audit events as JSONL to a file\n\
177 choose one. If you need both machine-readable stderr AND a separate audit \
178 file, open an issue — today's layer composition doesn't support it."
179 );
180 std::process::exit(2);
181 }
182 futu_core::log::init_json_logging_with_level(&config.log_level);
183 None
184 } else {
185 match futu_core::log::init_logging_with_audit(&config.log_level, audit_log.as_deref()) {
186 Ok(guard) => {
187 if let (Some(path), Some(_)) = (audit_log.as_ref(), guard.as_ref()) {
188 tracing::info!(
189 path = %path.display(),
190 "audit JSONL logger enabled (target=futu_audit → file)"
191 );
192 }
193 guard
194 }
195 Err(e) => {
196 eprintln!("warning: failed to init audit log: {e}");
197 futu_core::log::init_logging_with_level(&config.log_level);
198 None
199 }
200 }
201 };
202
203 // v1.4.102 codex 27 F11 (P2) fix: startup chmod migration 移到无条件 path,
204 // 不再放在 login 分支里. 升级 (v1.4.101 及以前) 用户的
205 // `~/.futu-opend-rs/credentials-*.json` / `device-*.dat` 默认是 0644,
206 // 多用户机其他本地用户能读 tgtgt / web_sig. 此 fn 扫 ~/.futu-opend-rs/ 把
207 // secret 文件统一收紧到 0600.
208 //
209 // **历史**: BUG-012 修法 v1.4.102 ship 时把此 call 放在 `if let
210 // (Some(account), Some(password))` 分支内 — 无登录凭据 / 只跑 admin shell
211 // / 凭据解析失败的 daemon 都不执行 migration. codex 27 F11 audit 抓到.
212 //
213 // best-effort: chmod 失败 warn but don't fail (失败 != 凭据本身失效).
214 futu_backend::auth::tighten_secret_files_at_startup();
215
216 // v1.4.41 (P3.1 第二阶段): tracing subscriber 已装,重装 panic hook
217 // 让 panic 走 tracing::error!(audit log / JSON log / stderr 三出)。
218 std::panic::set_hook(Box::new(|info| {
219 let location = info
220 .location()
221 .map(|l| format!("{}:{}", l.file(), l.line()))
222 .unwrap_or_else(|| "<unknown>".to_string());
223 let payload = info
224 .payload()
225 .downcast_ref::<&str>()
226 .copied()
227 .or_else(|| info.payload().downcast_ref::<String>().map(|s| s.as_str()))
228 .unwrap_or("<non-string panic payload>");
229 let thread = std::thread::current()
230 .name()
231 .unwrap_or("<unnamed>")
232 .to_string();
233 tracing::error!(
234 target: "panic",
235 location = %location,
236 payload = %payload,
237 thread = %thread,
238 "PANIC caught by global hook"
239 );
240 eprintln!("PANIC at {location}: {payload} (thread={thread})");
241 // v1.4.97 P1-D-D: also write dated crash log to disk for forensics.
242 // Same as pre-tracing hook — covers panics that occur after tracing
243 // subscriber is up.
244 write_crash_log_file(info);
245 // v1.4.97 P1-D-E: propagate any panic → process exit so systemd
246 // Restart=on-failure can restart the daemon. Without this, tokio
247 // task panic silently kills only the task, leaving daemon zombie
248 // (REST/gRPC/WS/telnet/push tasks die one-by-one with main alive).
249 //
250 // Aligned with C++ NNCrashCenter `exit(NN_ExitCode_Crash)` pattern
251 // (NNCrashCenter_Mac.cpp:99; per agent 9 finding).
252 // `exit(101)` matches Rust panic default exit code; not used in
253 // test build (cfg(not(test))) to avoid disrupting unit tests.
254 #[cfg(not(test))]
255 std::process::exit(101);
256 }));
257
258 // 2. install 全局 metrics registry(让 audit::* 的 counter hook 和
259 // REST `/metrics` 端点能对齐同一套计数器)
260 futu_auth::metrics::install(std::sync::Arc::new(futu_auth::MetricsRegistry::default()));
261
262 // 2.1 共享 RuntimeCounters:REST / gRPC 共用一个,这样 rate limit 和日累计
263 // 跨接口一致(同一把 key 通过 REST 下 3 单、gRPC 下 3 单,rate 窗口
264 // 看到 6 单,不是各看 3 单)
265 let shared_counters = std::sync::Arc::new(futu_auth::RuntimeCounters::new());
266
267 let listen_addr = format!("{}:{}", config.ip, config.port);
268 tracing::info!(addr = %listen_addr, "starting FutuOpenD Rust Gateway");
269
270 // v1.4.42 (external reviewer v1.4.40 报告 P3.5 澄清): moomoo 账户 + 显式 --login-region
271 // → WARN 提示此 flag 对 moomoo 账户 noop(不影响 platform IP 选择)。
272 //
273 // 原因:login_region 只用在 CN 手机号账户的 salt URL `region_no` 参数,
274 // platform IP 池按 user_attribution(CN/HK/US/SG/AU/JP)从 conn_points
275 // 选,不按 login_region 切(3 个 region 代号 gz/sh/hk 是 CN 大陆分区,
276 // 和 platform IP 池没对应关系)。external reviewer v1.4.40 报告观察 "三种 region 下
277 // platform IP 相同" 是预期行为,不是 bug。
278 //
279 // v1.4.40 计划中"加 WARN"实际没加(CHANGELOG 声称但代码漏),v1.4.42 补上。
280 if config.login_region_explicit && matches!(config.platform, Platform::Moomoo) {
281 tracing::warn!(
282 login_region = %config.login_region,
283 platform = "moomoo",
284 "--login-region={region} is a NO-OP for moomoo accounts — flag only \
285 applies to --platform futunn + CN phone-number login. moomoo accounts \
286 route via user_attribution automatically. Observed \"same platform IP \
287 across gz/sh/hk\" is expected behavior. Remove --login-region to silence.",
288 region = config.login_region
289 );
290 }
291
292 // v1.4.16:端口冲突检测——在登录之前先检查核心端口是否已被占用。
293 // 多实例并行(如同时跑 futunn + moomoo)是预期场景,但用户容易忘记改端口,
294 // 导致 futucli 连到了旧实例看到错账号的数据(同事 bug report #6)。
295 {
296 let ports_to_check: Vec<(&str, u16)> = std::iter::once(("FTAPI", config.port))
297 .chain(config.rest_port.map(|p| ("REST", p)))
298 .chain(config.grpc_port.map(|p| ("gRPC", p)))
299 .chain(config.websocket_port.map(|p| ("WebSocket", p)))
300 .chain(config.telnet_port.map(|p| ("Telnet", p)))
301 .collect();
302 for (name, port) in &ports_to_check {
303 let addr = port_probe_addr(&config.ip, *port);
304 if std::net::TcpStream::connect_timeout(&addr, std::time::Duration::from_millis(200))
305 .is_ok()
306 {
307 tracing::warn!(
308 name,
309 port,
310 "⚠️ port {port} ({name}) is already in use! \
311 Another futu-opend or other process may be running. \
312 Use --port / --rest-port / --grpc-port to avoid conflict."
313 );
314 }
315 }
316 }
317
318 Ok(Phase1Out {
319 _audit_guard,
320 shared_counters,
321 listen_addr,
322 rest_keys_file,
323 ws_keys_file,
324 grpc_keys_file,
325 allow_tcp_unauthenticated,
326 })
327}
328
329#[cfg(test)]
330mod tests;