feat(reticulum): implement RNS restart functionality with UI updates

This commit is contained in:
Ivan
2026-04-16 19:13:32 -05:00
parent 44bf881686
commit d63255511f
8 changed files with 380 additions and 26 deletions

View File

@@ -698,7 +698,7 @@ class ReticulumMeshChat:
[[Default Interface]]
type = AutoInterface
enabled = false
enabled = true
name = Default Interface
selected_interface_mode = 1
"""
@@ -889,6 +889,152 @@ class ReticulumMeshChat:
),
)
def _force_close_listener(self, listener):
"""Aggressively close a multiprocessing.connection.Listener.
Calls Listener.close() and additionally drills through the inner
SocketListener wrapper to close the underlying socket file descriptor.
Necessary because the rpc_loop thread can retain references to the
listener that prevent the kernel from releasing abstract AF_UNIX
addresses on plain close().
"""
try:
if hasattr(listener, "close"):
with contextlib.suppress(Exception):
listener.close()
finally:
socket_type = getattr(socket, "SocketType", None)
wrappers = [listener]
listener_inner = getattr(listener, "_listener", None)
if listener_inner is not None:
wrappers.append(listener_inner)
for wrapper in list(wrappers):
inner_socket = getattr(wrapper, "_socket", None)
if inner_socket is not None:
wrappers.append(inner_socket)
plain_socket = getattr(wrapper, "socket", None)
if plain_socket is not None:
wrappers.append(plain_socket)
for obj in wrappers:
if socket_type is None or not isinstance(obj, socket_type):
continue
fileno = -1
try:
fileno = obj.fileno()
except Exception: # noqa: S110
pass
with contextlib.suppress(Exception):
obj.close()
if fileno != -1:
try:
os.close(fileno)
except OSError:
pass
for wrapper in wrappers:
with contextlib.suppress(Exception):
if hasattr(wrapper, "_socket"):
wrapper._socket = None
with contextlib.suppress(Exception):
if hasattr(wrapper, "socket"):
wrapper.socket = None
with contextlib.suppress(Exception):
if hasattr(listener, "_listener"):
listener._listener = None
def _force_close_abstract_unix_addr(self, addr) -> bool:
"""Close every socket FD in the current process bound to addr.
Returns True if any FD was closed. addr is expected to be a string
starting with a NUL byte (abstract AF_UNIX namespace).
"""
if not (isinstance(addr, str) and addr.startswith("\0")):
return False
target_bytes = addr.encode("utf-8", errors="replace")
target_no_nul = target_bytes[1:]
closed_any = False
try:
current_process = psutil.Process()
for conn in current_process.net_connections(kind="unix"):
try:
laddr = getattr(conn, "laddr", None)
fd = getattr(conn, "fd", -1)
if not laddr or fd in (-1, None):
continue
if isinstance(laddr, str):
laddr_bytes = laddr.encode("utf-8", errors="replace")
elif isinstance(laddr, bytes):
laddr_bytes = laddr
else:
continue
laddr_no_nul = (
laddr_bytes[1:]
if laddr_bytes.startswith(b"\0")
else laddr_bytes
)
if (
laddr_bytes == target_bytes
or laddr_bytes == target_no_nul
or laddr_no_nul == target_no_nul
):
try:
os.close(fd)
closed_any = True
print(
f"Force closed lingering abstract UNIX FD {fd} for {addr[1:]}",
)
except OSError as fd_err:
print(
f"Failed to close FD {fd} for {addr[1:]}: {fd_err}",
)
except Exception: # noqa: S110
pass
except Exception as e:
print(f"Error scanning process for abstract UNIX FDs: {e}")
if closed_any:
gc.collect()
time.sleep(0.2)
return closed_any
def _read_reticulum_instance_name(self):
"""Return current Reticulum instance_name from config or None."""
config_dir = self._normalize_reticulum_config_dir(
getattr(self, "reticulum_config_dir", None),
)
config_path = os.path.join(config_dir, "config")
if not os.path.isfile(config_path):
return None
cp = configparser.ConfigParser()
cp.read(config_path)
if not cp.has_section("reticulum"):
return None
return cp.get("reticulum", "instance_name", fallback=None)
def _write_reticulum_instance_name(self, instance_name):
"""Persist a Reticulum instance_name value into the config."""
config_dir = self._normalize_reticulum_config_dir(
getattr(self, "reticulum_config_dir", None),
)
config_path = os.path.join(config_dir, "config")
cp = configparser.ConfigParser()
cp.read(config_path)
if not cp.has_section("reticulum"):
cp.add_section("reticulum")
cp.set("reticulum", "instance_name", instance_name)
with open(config_path, "w", encoding="utf-8") as f:
cp.write(f)
async def reload_reticulum(self):
print("Hot reloading Reticulum stack...")
# Keep reference to old reticulum instance for cleanup
@@ -1004,9 +1150,7 @@ class ReticulumMeshChat:
except Exception as e:
print(f"Warning during aggressive interface cleanup: {e}")
# Close RPC listener if it exists on the instance
if old_reticulum:
# Reticulum uses private attributes for the listener
rpc_listener_names = [
"rpc_listener",
"_Reticulum__rpc_listener",
@@ -1020,8 +1164,7 @@ class ReticulumMeshChat:
print(
f"Forcing closure of RPC listener in {attr_name}...",
)
if hasattr(listener, "close"):
listener.close()
self._force_close_listener(listener)
setattr(old_reticulum, attr_name, None)
except Exception as e:
print(f"Warning closing RPC listener {attr_name}: {e}")
@@ -1066,8 +1209,6 @@ class ReticulumMeshChat:
if hasattr(self, "reticulum"):
del self.reticulum
# Wait another moment for sockets to definitely be released by OS
# Also give some time for the RPC listener port to settle
print("Waiting for ports to settle...")
await asyncio.sleep(4)
@@ -1118,10 +1259,10 @@ class ReticulumMeshChat:
print(f"Warning reading Reticulum config for ports: {e}")
if not rpc_addrs:
# Defaults
rpc_addrs.append((("127.0.0.1", 37429), "AF_INET"))
rpc_addrs.append((("127.0.0.1", 37428), "AF_INET"))
abstract_unix_addr_in_use_after_wait = False
for i in range(15):
all_free = True
for addr, family_str in rpc_addrs:
@@ -1158,11 +1299,29 @@ class ReticulumMeshChat:
and addr.startswith("\0")
)
if is_abstract_unix_addr:
# Another local shared-instance daemon may own this address.
# Do not block reload on abstract UNIX sockets.
print(
f"Abstract RPC addr {addr_display} is occupied by another process; continuing reload.",
)
released = False
if self._force_close_abstract_unix_addr(addr):
try:
s2 = socket.socket(
socket.AF_UNIX,
socket.SOCK_STREAM,
)
try:
s2.bind(addr)
s2.close()
print(
f"Released abstract RPC addr {addr_display} from this process.",
)
released = True
except OSError:
s2.close()
except Exception: # noqa: S110
pass
if released:
continue
all_free = False
continue
all_free = False
@@ -1282,9 +1441,16 @@ class ReticulumMeshChat:
await asyncio.sleep(1)
if not all_free:
# One last attempt with a very short sleep before failing
await asyncio.sleep(2)
# Check again one last time
for addr, family_str in rpc_addrs:
if (
family_str == "AF_UNIX"
and isinstance(addr, str)
and addr.startswith("\0")
):
with contextlib.suppress(Exception):
self._force_close_abstract_unix_addr(addr)
last_check_all_free = True
for addr, family_str in rpc_addrs:
try:
@@ -1298,8 +1464,16 @@ class ReticulumMeshChat:
s.bind(addr)
s.close()
except OSError:
last_check_all_free = False
s.close()
is_abstract = (
family == socket.AF_UNIX
and isinstance(addr, str)
and addr.startswith("\0")
)
if is_abstract:
abstract_unix_addr_in_use_after_wait = True
continue
last_check_all_free = False
break
except Exception: # noqa: S110
pass
@@ -1312,20 +1486,37 @@ class ReticulumMeshChat:
)
print("RNS ports finally free after last-second check.")
# Final GC to ensure everything is released
gc.collect()
# Clear singleton right before spinning up a fresh RNS instance.
if hasattr(RNS.Reticulum, "_Reticulum__instance"):
RNS.Reticulum._Reticulum__instance = None
# Re-setup identity (this starts background loops again)
original_instance_name = None
switched_instance_name = None
if abstract_unix_addr_in_use_after_wait:
original_instance_name = self._read_reticulum_instance_name()
base_name = original_instance_name or "default"
switched_instance_name = (
f"{base_name}-reload-{os.getpid()}-{int(time.time())}"
)
self._write_reticulum_instance_name(switched_instance_name)
print(
"Abstract UNIX RPC address remained busy. "
f"Retrying with temporary instance_name={switched_instance_name}",
)
self.running = True
await self._send_rns_reload_status(
"starting-services",
"Starting identity services again...",
)
self.setup_identity(identity_to_restore)
try:
self.setup_identity(identity_to_restore)
finally:
if switched_instance_name:
self._write_reticulum_instance_name(
original_instance_name or "default",
)
await self._send_rns_reload_status(
"done",
"RNS reload complete.",

View File

@@ -72,6 +72,15 @@
<v-icon icon="mdi-restart" size="20" class="mr-2"></v-icon>
{{ $t("common.restart_app") }}
</button>
<button
type="button"
class="secondary-chip flex-1 min-[480px]:flex-none min-h-[44px] sm:min-h-0 justify-center"
:disabled="reloadingRns"
@click="restartRns"
>
<v-icon icon="mdi-restart-alert" size="20" class="mr-2"></v-icon>
{{ reloadingRns ? $t("app.reloading_rns") : "Restart RNS" }}
</button>
<button
type="button"
class="danger-chip flex-1 min-[480px]:flex-none min-h-[44px] sm:min-h-0 justify-center"
@@ -922,6 +931,7 @@ export default {
restoreError: "",
restoreFileName: "",
restoreFile: null,
reloadingRns: false,
snapshotName: "",
snapshots: [],
snapshotsTotal: 0,
@@ -1331,6 +1341,19 @@ export default {
relaunch() {
ElectronUtils.relaunch();
},
async restartRns() {
if (this.reloadingRns) return;
try {
this.reloadingRns = true;
const response = await window.api.post("/api/v1/reticulum/reload");
ToastUtils.success(response?.data?.message || this.$t("app.reloaded_rns"));
await this.getAppInfo();
} catch (e) {
ToastUtils.error(e.response?.data?.error || this.$t("settings.failed_reload_reticulum"));
} finally {
this.reloadingRns = false;
}
},
async shutdown() {
if (
await DialogUtils.confirm(

View File

@@ -57,6 +57,15 @@
<MaterialDesignIcon icon-name="export" class="w-4 h-4" />
{{ $t("interfaces.export_all") }}
</button>
<button
type="button"
class="secondary-chip text-sm"
:disabled="reloadingRns"
@click="reloadRns"
>
<MaterialDesignIcon icon-name="restart" class="w-4 h-4" />
<span>{{ reloadingRns ? $t("app.reloading_rns") : "Restart RNS" }}</span>
</button>
</div>
</div>

View File

@@ -2058,17 +2058,20 @@
</div>
</header>
<div class="settings-section__body space-y-4">
<div class="flex flex-col gap-3">
<div class="grid grid-cols-1 gap-3">
<button
class="btn btn--secondary w-full justify-center py-3"
type="button"
class="btn-maintenance border-violet-200 dark:border-violet-900/30 text-violet-800 dark:text-violet-200 bg-violet-50 dark:bg-violet-900/10 hover:bg-violet-100 dark:hover:bg-violet-900/20 disabled:opacity-60 disabled:cursor-not-allowed disabled:hover:bg-violet-50 dark:disabled:hover:bg-violet-900/10"
:disabled="reloadingRns"
@click="reloadRns"
>
<span>{{ $t("app.reload_rns") }}</span>
<div class="flex flex-col items-start text-left">
<div class="font-bold flex items-center gap-2">
<MaterialDesignIcon icon-name="restart" class="size-4" />
{{ $t("app.reload_rns") }}
</div>
</div>
</button>
<p class="text-xs text-gray-500 dark:text-gray-400">
{{ $t("app.reload_rns_description") }}
</p>
<p
v-if="reloadRnsStatusMessage"
class="text-xs"

View File

@@ -269,6 +269,100 @@ async def test_reload_reticulum_does_not_probe_unix_fallback_by_default(
app.teardown_identity()
def test_reticulum_instance_name_helpers(mock_rns, temp_dir):
with (
patch("meshchatx.src.backend.identity_context.Database"),
patch("meshchatx.src.backend.identity_context.ConfigManager"),
patch("meshchatx.src.backend.identity_context.MessageHandler"),
patch("meshchatx.src.backend.identity_context.AnnounceManager"),
patch("meshchatx.src.backend.identity_context.ArchiverManager"),
patch("meshchatx.src.backend.identity_context.MapManager"),
patch("meshchatx.src.backend.identity_context.TelephoneManager"),
patch("meshchatx.src.backend.identity_context.VoicemailManager"),
patch("meshchatx.src.backend.identity_context.RingtoneManager"),
patch("meshchatx.src.backend.identity_context.RNCPHandler"),
patch("meshchatx.src.backend.identity_context.RNStatusHandler"),
patch("meshchatx.src.backend.identity_context.RNProbeHandler"),
patch("meshchatx.src.backend.identity_context.TranslatorHandler"),
patch("LXMF.LXMRouter"),
):
app = ReticulumMeshChat(
identity=mock_rns["id_instance"],
storage_dir=temp_dir,
reticulum_config_dir=temp_dir,
)
config_path = os.path.join(temp_dir, "config")
with open(config_path, "w", encoding="utf-8") as f:
f.write("[reticulum]\n")
f.write("instance_name = default\n")
assert app._read_reticulum_instance_name() == "default"
app._write_reticulum_instance_name("reload-test")
assert app._read_reticulum_instance_name() == "reload-test"
app.teardown_identity()
@pytest.mark.asyncio
async def test_reload_reticulum_switches_instance_name_when_unix_addr_stuck(
mock_rns,
temp_dir,
):
with (
patch("meshchatx.src.backend.identity_context.Database"),
patch("meshchatx.src.backend.identity_context.ConfigManager"),
patch("meshchatx.src.backend.identity_context.MessageHandler"),
patch("meshchatx.src.backend.identity_context.AnnounceManager"),
patch("meshchatx.src.backend.identity_context.ArchiverManager"),
patch("meshchatx.src.backend.identity_context.MapManager"),
patch("meshchatx.src.backend.identity_context.TelephoneManager"),
patch("meshchatx.src.backend.identity_context.VoicemailManager"),
patch("meshchatx.src.backend.identity_context.RingtoneManager"),
patch("meshchatx.src.backend.identity_context.RNCPHandler"),
patch("meshchatx.src.backend.identity_context.RNStatusHandler"),
patch("meshchatx.src.backend.identity_context.RNProbeHandler"),
patch("meshchatx.src.backend.identity_context.TranslatorHandler"),
patch("LXMF.LXMRouter"),
patch("asyncio.sleep", return_value=None),
patch("socket.socket") as mock_socket,
):
app = ReticulumMeshChat(
identity=mock_rns["id_instance"],
storage_dir=temp_dir,
reticulum_config_dir=temp_dir,
)
app.setup_identity = MagicMock()
old_reticulum = getattr(app, "reticulum", None)
assert old_reticulum is not None
old_reticulum.rpc_addr = "\0rns/default/rpc"
old_reticulum.rpc_type = "AF_UNIX"
def socket_factory(family, *args, **kwargs):
sock_instance = MagicMock()
if family == socket.AF_UNIX:
sock_instance.bind.side_effect = OSError(98, "Address already in use")
else:
sock_instance.bind.return_value = None
return sock_instance
mock_socket.side_effect = socket_factory
with (
patch.object(app, "_force_close_abstract_unix_addr", return_value=False),
patch.object(app, "_read_reticulum_instance_name", return_value="default"),
patch.object(app, "_write_reticulum_instance_name") as mock_write_name,
):
result = await app.reload_reticulum()
assert result is True
assert mock_write_name.call_count == 2
first_name = mock_write_name.call_args_list[0].args[0]
assert first_name.startswith("default-reload-")
assert mock_write_name.call_args_list[1].args[0] == "default"
app.teardown_identity()
@pytest.mark.asyncio
async def test_reload_reticulum_failure_recovery(mock_rns, temp_dir):
with (

View File

@@ -165,6 +165,17 @@ describe("AboutPage.vue", () => {
expect(shutdownSpy).toHaveBeenCalled();
});
it("restartRns posts reticulum reload endpoint", async () => {
const wrapper = mountAboutPage();
wrapper.vm.appInfo = { version: "1.0.0" };
await wrapper.vm.$nextTick();
axiosMock.post.mockResolvedValueOnce({ data: { message: "RNS restarted" } });
await wrapper.vm.restartRns();
expect(axiosMock.post).toHaveBeenCalledWith("/api/v1/reticulum/reload");
});
it("updates app info periodically", async () => {
axiosMock.get.mockResolvedValue({
data: {

View File

@@ -169,4 +169,26 @@ describe("InterfacesPage discovery actions", () => {
interface_discovery_blacklist: "10.0.0.8:4242",
});
});
it("reloadRns posts reticulum restart endpoint", async () => {
const wrapper = mount(InterfacesPage, {
global: {
stubs: {
RouterLink: true,
MaterialDesignIcon: true,
Toggle: true,
ImportInterfacesModal: true,
Interface: true,
},
mocks: {
$t: (key) => key,
$router: { push: vi.fn() },
},
},
});
await wrapper.vm.reloadRns();
expect(mockAxios.post).toHaveBeenCalledWith("/api/v1/reticulum/reload");
});
});

View File

@@ -539,6 +539,7 @@ describe("SettingsPage Component", () => {
await wrapper.vm.$nextTick();
expect(wrapper.text()).toContain("app.reload_rns");
expect(wrapper.text()).not.toContain("app.reload_rns_description");
});
it("enabling transport shows success toast after reload", async () => {