Skip to main content

futu_opend/startup/
phase1.rs

1//! v1.4.110 Layer 3 A: startup Phase 1 — bootstrap前置 (logging / metrics /
2//! 守护设施). 抽自原 `mod.rs::run_daemon` 33..219 行段.
3//!
4//! Phase 1 副作用 (按顺序):
5//! 1. keys-file 预 dry-run 验证 (REST/gRPC/WS) → fail-closed 早 abort
6//! 2. 初始化日志 (json vs plain + audit guard)
7//! 3. `tighten_secret_files_at_startup()` 把 0644 secret 文件收紧到 0600
8//! 4. 安装全局 panic hook (tracing + crash log + exit 101)
9//! 5. install futu_auth metrics registry
10//! 6. 构造 shared `RuntimeCounters`
11//! 7. 计算 `listen_addr` 并打印 "starting" 日志
12//! 8. WARN: moomoo + 显式 `--login-region`
13//! 9. 启动前端口冲突探测
14//!
15//! `--tz` / TOML `tz` 必须在 Tokio runtime 创建前应用,由
16//! [`super::apply_pre_runtime_tz`] 在 sync `main()` 里调用。
17
18use anyhow::Result;
19use std::sync::Arc;
20
21use crate::cli::Platform;
22use crate::config::RuntimeConfig;
23use crate::crash_log::write_crash_log_file;
24
25/// Phase 1 output — 必须由 caller 持有到进程退出, 否则 audit guard drop
26/// 会让 tracing-appender 后台线程提早关闭丢事件.
27pub(super) struct Phase1Out {
28    /// audit 日志 guard, drop = tracing-appender 关闭. caller 必须 outlive.
29    pub(super) _audit_guard: Option<tracing_appender::non_blocking::WorkerGuard>,
30    /// 共享 RuntimeCounters (REST + gRPC 共用一份保证跨 surface rate window 一致).
31    pub(super) shared_counters: Arc<futu_auth::RuntimeCounters>,
32    /// "ip:port" 字符串, 后续 server / WS / REST / gRPC / telnet 复用.
33    pub(super) listen_addr: String,
34    /// 从 config 复制的 keys file 路径 (Phase 4 使用).
35    pub(super) rest_keys_file: Option<std::path::PathBuf>,
36    pub(super) ws_keys_file: Option<std::path::PathBuf>,
37    pub(super) grpc_keys_file: Option<std::path::PathBuf>,
38    /// 是否允许无 auth 的 TCP listener (Phase 4 决策).
39    pub(super) allow_tcp_unauthenticated: bool,
40}
41
42pub(super) fn is_valid_iana_tz_name(tz: &str) -> bool {
43    if tz == "UTC" {
44        return true;
45    }
46    // Local shape guard for `--tz` before writing the process-wide TZ env var.
47    // We intentionally avoid a chrono_tz parse dependency here (see v1.4.87
48    // --tz decision) and accept only IANA-style slash-separated ASCII names.
49    // The 128-byte cap is a defensive config-boundary limit, not a protocol
50    // value; revisit if IANA tzdb ever grows names beyond this shape.
51    if tz.is_empty() || tz.len() > 128 || tz.starts_with('/') || tz.ends_with('/') {
52        return false;
53    }
54
55    let mut parts = 0usize;
56    for part in tz.split('/') {
57        parts += 1;
58        if part.is_empty() || part == "." || part == ".." {
59            return false;
60        }
61        if part.contains('.') {
62            return false;
63        }
64        if !part
65            .bytes()
66            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b'+'))
67        {
68            return false;
69        }
70    }
71
72    parts >= 2
73}
74
75pub(super) fn apply_pre_runtime_tz(config: &RuntimeConfig) {
76    if let Some(tz) = &config.tz {
77        // 轻度校验 IANA name 格式 (不跑 chrono_tz parse 避免 dep 膨胀)
78        if !is_valid_iana_tz_name(tz) {
79            eprintln!(
80                "error: --tz 无效 IANA timezone '{tz}'. 示例: Asia/Hong_Kong, America/New_York, UTC"
81            );
82            std::process::exit(2);
83        }
84        // SAFETY: production main calls this before constructing the Tokio
85        // runtime, so no runtime worker thread can concurrently read the
86        // process environment. Tests only cover the shape validator; they do
87        // not call this helper concurrently.
88        unsafe {
89            std::env::set_var("TZ", tz);
90        }
91        eprintln!("ℹ️  TZ set to '{tz}' via --tz flag / TOML tz (v1.4.87 #3 G1)");
92    }
93}
94
95fn port_probe_addr(bind_ip: &str, port: u16) -> std::net::SocketAddr {
96    match bind_ip.parse::<std::net::IpAddr>() {
97        Ok(std::net::IpAddr::V4(ip)) if ip.is_unspecified() => {
98            std::net::SocketAddr::from((std::net::Ipv4Addr::LOCALHOST, port))
99        }
100        Ok(std::net::IpAddr::V6(ip)) if ip.is_unspecified() => {
101            std::net::SocketAddr::from((std::net::Ipv6Addr::LOCALHOST, port))
102        }
103        Ok(ip) => std::net::SocketAddr::new(ip, port),
104        Err(_) => std::net::SocketAddr::from((std::net::Ipv4Addr::LOCALHOST, port)),
105    }
106}
107
108pub(super) async fn run_phase1(config: &RuntimeConfig) -> Result<Phase1Out> {
109    // codex 0547 F6 (P3): 安全字段从 config 读 (而非 args.*) — TOML 也能
110    // override 这些 (与 docs "字段与 CLI 一致" 契约对齐).
111    let rest_keys_file = config.rest_keys_file.clone();
112    let ws_keys_file = config.ws_keys_file.clone();
113    let grpc_keys_file = config.grpc_keys_file.clone();
114    let audit_log = config.audit_log.clone();
115    let allow_tcp_unauthenticated = config.allow_tcp_unauthenticated;
116
117    // v1.4.104 external reviewer P1-003 (P1) fix: keys-file 解析时序前移到 broker auth /
118    // SMS 之前. 之前 schema 错的 keys-file 要等到 surface server 启动时才报错
119    // (broker auth + SMS 之后 7+ 秒), 配置错应该启动就发现.
120    //
121    // 这里只**预解析 + dry-run 验证**, 不持久化结果 (实际 Arc 在 surface server
122    // 启动时由 KeyStore::load 重新读+解析, 因为 SIGHUP reload 也走那条路径).
123    // dry-run 失败 → 立即 abort, 不进 broker auth.
124    for (label, path_opt) in [
125        ("REST", &rest_keys_file),
126        ("gRPC", &grpc_keys_file),
127        ("WS", &ws_keys_file),
128    ] {
129        if let Some(path) = path_opt {
130            match futu_auth::KeyStore::load(path) {
131                Ok(ks) => {
132                    tracing::info!(
133                        surface = label,
134                        path = %path.display(),
135                        keys_loaded = ks.len(),
136                        "v1.4.104 external report P1-003 (P1): {} keys file pre-validated OK \
137                         (broker auth not yet started)",
138                        label
139                    );
140                }
141                Err(e) => {
142                    tracing::error!(
143                        surface = label,
144                        error = %e,
145                        path = %path.display(),
146                        "v1.4.104 external report P1-003 (P1): {} keys file pre-validation FAILED — \
147                         abort before broker auth / SMS to fail-closed early",
148                        label
149                    );
150                    return Err(anyhow::anyhow!(
151                        "v1.4.104 external report P1-003 (P1) fix: {} keys file at {} failed schema \
152                         validation: {e}. abort before broker auth / SMS. fix the keys \
153                         file then restart.",
154                        label,
155                        path.display()
156                    ));
157                }
158            }
159        }
160    }
161    // codex 0547 F6 (P3): merge_config 已提前到 args 仍可用阶段 (~line 1006);
162    // 此处不再重复 capture inject_auth_failure_every / merge_config.
163    // dev-flags 在 cfg(feature = "dev-flags") 路径上, capture 已在 args 解析后立即做.
164
165    // 1. 初始化日志(--log-level 参数生效,RUST_LOG 环境变量优先)
166    // audit 日志 guard 必须活到进程退出,否则 tracing-appender 后台线程可能丢事件。
167    let _audit_guard = if config.json_log {
168        // v1.4.27(BUG-7,加拿大同事 v1.4.26 回归测试发现):`--audit-log` 和
169        // `--json-log` 一起用时,之前是**静默忽略** `--audit-log`(只打 warn
170        // 到 stderr)、创建空文件;用户会误以为"没有审计事件发生"。现在改
171        // 硬失败 → 用户必须显式二选一,避免审计文件空导致的合规事故。
172        if audit_log.is_some() {
173            eprintln!(
174                "error: --audit-log and --json-log are mutually exclusive.\n\
175                 - --json-log: entire stderr as JSONL (full event stream)\n\
176                 - --audit-log: only target=futu_audit events as JSONL to a file\n\
177                 choose one. If you need both machine-readable stderr AND a separate audit \
178                 file, open an issue — today's layer composition doesn't support it."
179            );
180            std::process::exit(2);
181        }
182        futu_core::log::init_json_logging_with_level(&config.log_level);
183        None
184    } else {
185        match futu_core::log::init_logging_with_audit(&config.log_level, audit_log.as_deref()) {
186            Ok(guard) => {
187                if let (Some(path), Some(_)) = (audit_log.as_ref(), guard.as_ref()) {
188                    tracing::info!(
189                        path = %path.display(),
190                        "audit JSONL logger enabled (target=futu_audit → file)"
191                    );
192                }
193                guard
194            }
195            Err(e) => {
196                eprintln!("warning: failed to init audit log: {e}");
197                futu_core::log::init_logging_with_level(&config.log_level);
198                None
199            }
200        }
201    };
202
203    // v1.4.102 codex 27 F11 (P2) fix: startup chmod migration 移到无条件 path,
204    // 不再放在 login 分支里. 升级 (v1.4.101 及以前) 用户的
205    // `~/.futu-opend-rs/credentials-*.json` / `device-*.dat` 默认是 0644,
206    // 多用户机其他本地用户能读 tgtgt / web_sig. 此 fn 扫 ~/.futu-opend-rs/ 把
207    // secret 文件统一收紧到 0600.
208    //
209    // **历史**: BUG-012 修法 v1.4.102 ship 时把此 call 放在 `if let
210    // (Some(account), Some(password))` 分支内 — 无登录凭据 / 只跑 admin shell
211    // / 凭据解析失败的 daemon 都不执行 migration. codex 27 F11 audit 抓到.
212    //
213    // best-effort: chmod 失败 warn but don't fail (失败 != 凭据本身失效).
214    futu_backend::auth::tighten_secret_files_at_startup();
215
216    // v1.4.41 (P3.1 第二阶段): tracing subscriber 已装,重装 panic hook
217    // 让 panic 走 tracing::error!(audit log / JSON log / stderr 三出)。
218    std::panic::set_hook(Box::new(|info| {
219        let location = info
220            .location()
221            .map(|l| format!("{}:{}", l.file(), l.line()))
222            .unwrap_or_else(|| "<unknown>".to_string());
223        let payload = info
224            .payload()
225            .downcast_ref::<&str>()
226            .copied()
227            .or_else(|| info.payload().downcast_ref::<String>().map(|s| s.as_str()))
228            .unwrap_or("<non-string panic payload>");
229        let thread = std::thread::current()
230            .name()
231            .unwrap_or("<unnamed>")
232            .to_string();
233        tracing::error!(
234            target: "panic",
235            location = %location,
236            payload = %payload,
237            thread = %thread,
238            "PANIC caught by global hook"
239        );
240        eprintln!("PANIC at {location}: {payload} (thread={thread})");
241        // v1.4.97 P1-D-D: also write dated crash log to disk for forensics.
242        // Same as pre-tracing hook — covers panics that occur after tracing
243        // subscriber is up.
244        write_crash_log_file(info);
245        // v1.4.97 P1-D-E: propagate any panic → process exit so systemd
246        // Restart=on-failure can restart the daemon. Without this, tokio
247        // task panic silently kills only the task, leaving daemon zombie
248        // (REST/gRPC/WS/telnet/push tasks die one-by-one with main alive).
249        //
250        // Aligned with C++ NNCrashCenter `exit(NN_ExitCode_Crash)` pattern
251        // (NNCrashCenter_Mac.cpp:99; per agent 9 finding).
252        // `exit(101)` matches Rust panic default exit code; not used in
253        // test build (cfg(not(test))) to avoid disrupting unit tests.
254        #[cfg(not(test))]
255        std::process::exit(101);
256    }));
257
258    // 2. install 全局 metrics registry(让 audit::* 的 counter hook 和
259    //    REST `/metrics` 端点能对齐同一套计数器)
260    futu_auth::metrics::install(std::sync::Arc::new(futu_auth::MetricsRegistry::default()));
261
262    // 2.1 共享 RuntimeCounters:REST / gRPC 共用一个,这样 rate limit 和日累计
263    //     跨接口一致(同一把 key 通过 REST 下 3 单、gRPC 下 3 单,rate 窗口
264    //     看到 6 单,不是各看 3 单)
265    let shared_counters = std::sync::Arc::new(futu_auth::RuntimeCounters::new());
266
267    let listen_addr = format!("{}:{}", config.ip, config.port);
268    tracing::info!(addr = %listen_addr, "starting FutuOpenD Rust Gateway");
269
270    // v1.4.42 (external reviewer v1.4.40 报告 P3.5 澄清): moomoo 账户 + 显式 --login-region
271    // → WARN 提示此 flag 对 moomoo 账户 noop(不影响 platform IP 选择)。
272    //
273    // 原因:login_region 只用在 CN 手机号账户的 salt URL `region_no` 参数,
274    // platform IP 池按 user_attribution(CN/HK/US/SG/AU/JP)从 conn_points
275    // 选,不按 login_region 切(3 个 region 代号 gz/sh/hk 是 CN 大陆分区,
276    // 和 platform IP 池没对应关系)。external reviewer v1.4.40 报告观察 "三种 region 下
277    // platform IP 相同" 是预期行为,不是 bug。
278    //
279    // v1.4.40 计划中"加 WARN"实际没加(CHANGELOG 声称但代码漏),v1.4.42 补上。
280    if config.login_region_explicit && matches!(config.platform, Platform::Moomoo) {
281        tracing::warn!(
282            login_region = %config.login_region,
283            platform = "moomoo",
284            "--login-region={region} is a NO-OP for moomoo accounts — flag only \
285             applies to --platform futunn + CN phone-number login. moomoo accounts \
286             route via user_attribution automatically. Observed \"same platform IP \
287             across gz/sh/hk\" is expected behavior. Remove --login-region to silence.",
288            region = config.login_region
289        );
290    }
291
292    // v1.4.16:端口冲突检测——在登录之前先检查核心端口是否已被占用。
293    // 多实例并行(如同时跑 futunn + moomoo)是预期场景,但用户容易忘记改端口,
294    // 导致 futucli 连到了旧实例看到错账号的数据(同事 bug report #6)。
295    {
296        let ports_to_check: Vec<(&str, u16)> = std::iter::once(("FTAPI", config.port))
297            .chain(config.rest_port.map(|p| ("REST", p)))
298            .chain(config.grpc_port.map(|p| ("gRPC", p)))
299            .chain(config.websocket_port.map(|p| ("WebSocket", p)))
300            .chain(config.telnet_port.map(|p| ("Telnet", p)))
301            .collect();
302        for (name, port) in &ports_to_check {
303            let addr = port_probe_addr(&config.ip, *port);
304            if std::net::TcpStream::connect_timeout(&addr, std::time::Duration::from_millis(200))
305                .is_ok()
306            {
307                tracing::warn!(
308                    name,
309                    port,
310                    "⚠️  port {port} ({name}) is already in use! \
311                     Another futu-opend or other process may be running. \
312                     Use --port / --rest-port / --grpc-port to avoid conflict."
313                );
314            }
315        }
316    }
317
318    Ok(Phase1Out {
319        _audit_guard,
320        shared_counters,
321        listen_addr,
322        rest_keys_file,
323        ws_keys_file,
324        grpc_keys_file,
325        allow_tcp_unauthenticated,
326    })
327}
328
329#[cfg(test)]
330mod tests;