Sprites FS ↔ Shell isolation issue post checkpoint restore?

import os
import time
import sys
from sprites import SpritesClient
from dotenv import load_dotenv

# --- Configuration & Helpers ---
load_dotenv()
TOKEN = os.environ.get("SPRITE_TOKEN")
if not TOKEN:
    print("Error: SPRITE_TOKEN not set.")
    sys.exit(1)

CLIENT = SpritesClient(TOKEN)
SPRITE_NAME = f"repro-isolation-{int(time.time())}"

class Logger:
    GREEN = "\033[92m"
    BLUE = "\033[94m"
    YELLOW = "\033[93m"
    RED = "\033[91m"
    BOLD = "\033[1m"
    END = "\033[0m"

    @staticmethod
    def step(msg):
        print(f"\n{Logger.BOLD}{Logger.BLUE}=>{Logger.END} {Logger.BOLD}{msg}{Logger.END}")

    @staticmethod
    def info(msg):
        print(f"    {msg}")

    @staticmethod
    def success(msg):
        print(f"    {Logger.GREEN}βœ“ {msg}{Logger.END}")

    @staticmethod
    def error(msg):
        print(f"    {Logger.RED}βœ— {msg}{Logger.END}")

    @staticmethod
    def warning(msg):
        print(f"    {Logger.YELLOW}! {msg}{Logger.END}")

def run_shell(sprite, cmd):
    try:
        return sprite.command("bash", "-c", cmd).output().decode().strip()
    except Exception as e:
        return f"ERROR: {e}"

def wait_for_ready(sprite):
    for i in range(15):
        try:
            if CLIENT.get_sprite(SPRITE_NAME).status == "running":
                sprite.command("true").run()
                return True
        except:
            pass
        time.sleep(2)
    return False

# --- Repro Sequence ---

def main():
    sprite = None
    try:
        Logger.step(f"Creating fresh sprite: {SPRITE_NAME}")
        CLIENT.create_sprite(SPRITE_NAME)
        sprite = CLIENT.sprite(SPRITE_NAME)
        if not wait_for_ready(sprite):
            Logger.error("Sprite failed to start.")
            return

        fs = sprite.filesystem("/")

        # --- Phase 1: Verify Initial Sync ---
        Logger.step("Phase 1: Verifying Initial Bidirectional Synchronization")
        
        # 1. API Write -> Shell Read
        api_initial = "/home/sprite/api_initial.txt"
        fs.path(api_initial).write_text("sync test api")
        if api_initial in run_shell(sprite, f"ls {api_initial}"):
            Logger.success("Initial: Shell sees API-written file.")
        else:
            Logger.error("Initial: Shell DOES NOT see API-written file.")
            return

        # 2. Shell Write -> API Read
        shell_initial = "/home/sprite/shell_initial.txt"
        run_shell(sprite, f"echo 'sync test shell' > {shell_initial}")
        if fs.path(shell_initial).exists():
            Logger.success("Initial: API sees Shell-written file.")
        else:
            Logger.error("Initial: API DOES NOT see Shell-written file.")
            return

        # --- Phase 2: Create Checkpoint ---
        Logger.step("Phase 2: Creating Checkpoint 'baseline'")
        for msg in sprite.create_checkpoint("baseline"):
            if msg.type == "info":
                Logger.info(msg.data.strip())
        Logger.success("Checkpoint created.")

        # --- Phase 3: Restore Checkpoint ---
        Logger.step("Phase 3: Restoring Checkpoint 'baseline'")
        checkpoints = sprite.list_checkpoints()
        base_cp = next(cp for cp in checkpoints if cp.comment == "baseline")
        
        for msg in sprite.restore_checkpoint(base_cp.id):
            if msg.type == "info":
                Logger.info(msg.data.strip())
        
        Logger.info("Waiting for recovery...")
        time.sleep(5)
        if not wait_for_ready(sprite):
            Logger.error("Sprite failed to recover after restore.")
            return
        Logger.success("Restore complete.")

        # --- Phase 4: Proof of Isolation ---
        Logger.step("Phase 4: Proving Post-Restore Isolation")
        
        # 1. API Write -> Shell Read
        api_post = "/home/sprite/api_post_restore.txt"
        Logger.info(f"Writing {api_post} via API...")
        fs.path(api_post).write_text("isolated")
        
        if "No such file" in run_shell(sprite, f"ls {api_post}"):
            Logger.warning("Post-Restore: Shell CANNOT see API-written file (Isolated!)")
            api_to_shell_fail = True
        else:
            Logger.success("Post-Restore: Shell CAN see API-written file (Synchronized)")
            api_to_shell_fail = False

        # 2. Shell Write -> API Read
        shell_post = "/home/sprite/shell_post_restore.txt"
        Logger.info(f"Writing {shell_post} via Shell...")
        run_shell(sprite, f"echo 'isolated' > {shell_post}")
        
        if not fs.path(shell_post).exists():
            Logger.warning("Post-Restore: API CANNOT see Shell-written file (Isolated!)")
            shell_to_api_fail = True
        else:
            Logger.success("Post-Restore: API CAN see Shell-written file (Synchronized)")
            shell_to_api_fail = False

        # --- Conclusion ---
        Logger.step("Conclusion")
        if api_to_shell_fail or shell_to_api_fail:
            print(f"\n{Logger.RED}{Logger.BOLD}REPRO SUCCESSFUL:{Logger.END} Filesystem isolation detected after restore.")
            if api_to_shell_fail: print(f"    - API -> Shell synchronization broken")
            if shell_to_api_fail: print(f"    - Shell -> API synchronization broken")
        else:
            print(f"\n{Logger.GREEN}{Logger.BOLD}REPRO FAILED:{Logger.END} Filesystem remains synchronized.")

    except Exception as e:
        Logger.error(f"Unexpected error during repro: {e}")
    finally:
        if sprite:
            Logger.step(f"Cleaning up: Destroying {SPRITE_NAME}")
            try:
                CLIENT.delete_sprite(SPRITE_NAME)
                Logger.success("Cleanup successful.")
            except Exception as e:
                Logger.error(f"Cleanup failed: {e}")

if __name__ == "__main__":
    main()

I see

=> Creating fresh sprite: repro-isolation-1768993628

=> Phase 1: Verifying Initial Bidirectional Synchronization
    βœ“ Initial: Shell sees API-written file.
    βœ“ Initial: API sees Shell-written file.

=> Phase 2: Creating Checkpoint 'baseline'
    Creating checkpoint...
    Checkpoint created successfully
    Checkpoint Details:
    ID: v1
    Created: 2026-01-21 08:01:22
    Path: checkpoints/v1
    To restore this checkpoint:
    sprite checkpoint restore v1
    curl -X POST /checkpoints/v1/restore
    βœ“ Checkpoint created.

=> Phase 3: Restoring Checkpoint 'baseline'
    Restoring from checkpoint v1...
    Container components started successfully
    Waiting for recovery...
    βœ“ Restore complete.

=> Phase 4: Proving Post-Restore Isolation
    Writing /home/sprite/api_post_restore.txt via API...
    βœ“ Post-Restore: Shell CAN see API-written file (Synchronized)
    Writing /home/sprite/shell_post_restore.txt via Shell...
    ! Post-Restore: API CANNOT see Shell-written file (Isolated!)

=> Conclusion

REPRO SUCCESSFUL: Filesystem isolation detected after restore.
    - Shell -> API synchronization broken

=> Cleaning up: Destroying repro-isolation-1768993628
    βœ“ Cleanup successful.

I’m trying to parse through this to understand what this is demonstrating. After a read through and some chatGPT:

This repro shows that before checkpoint/restore, the Sprites Python SDK filesystem view (fs = sprite.filesystem("/")) and the container shell agree (API writes are visible to shell, and shell writes are visible to API).

After a checkpoint restore, that symmetry breaks: the shell can still see files written via the SDK, but the SDK can no longer see files written from inside the container. In other words, post-restore, Shell β†’ SDK filesystem visibility is broken, suggesting the SDK filesystem handle/view becomes stale or detached across restore.

The code creates fs = sprite.filesystem("/") once (before restore) and then reuses it after restore, so this may be a stale SDK filesystem session after restore rather than β€œthe container filesystem is truly split.”

The fs endpoints are an early preview, it’s quite possible they’re not doing the right thing after a restore. We’ll look, I wonder if they’re holding a fd to the β€œold” mounts.

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.