Check synchronization of local and cloud files and directories

import os

instance_name = f"test-sqlite-sync"
!lamin load {instance_name}
!yes | lamin delete {instance_name}
from lamindb_setup import init, settings
import time
import os
init(
    storage=f"s3://lamindb-ci/{instance_name}",
    name=instance_name,
)

Set everything up before starting the tests

dir_sync = settings.storage.root / "dir_sync"
dir_sync.fs.invalidate_cache()
if dir_sync.is_dir():
    dir_sync.rmdir()
assert not dir_sync.exists()

(dir_sync / "file1").touch()
(dir_sync / "file2").touch()

assert dir_sync.is_dir()
dir_sync_local = settings.storage.cloud_to_local_no_update(dir_sync)
if dir_sync_local.is_dir():
    for file in dir_sync_local.iterdir():
        file.unlink()
    dir_sync_local.rmdir()
assert not dir_sync_local.exists()
num_files = lambda directory: len(
    [file for file in directory.rglob("*") if file.is_file()]
)

Test sync of general files and directories

dir_sync_local = settings.storage.cloud_to_local(dir_sync)
assert dir_sync_local.is_dir()
assert num_files(dir_sync_local) == 2
for file in ("file1", "file2"):
    assert (dir_sync_local / file).stat().st_mtime == (
        dir_sync / file
    ).modified.timestamp()
local_file = dir_sync_local / "file1"
local_file.unlink()
assert not local_file.exists()
assert num_files(dir_sync_local) == 1
dir_sync_local = settings.storage.cloud_to_local(dir_sync)
assert local_file.exists()
assert num_files(dir_sync_local) == 2
for file in ("file1", "file2"):
    cloud_file = dir_sync / file
    local_file = dir_sync_local / file

    cloud_mtime = cloud_file.modified.timestamp()
    os.utime(local_file, times=(cloud_mtime - 1, cloud_mtime - 1))

    assert local_file.stat().st_mtime < cloud_mtime
dir_sync_local = settings.storage.cloud_to_local(dir_sync)

for file in ("file1", "file2"):
    assert (dir_sync_local / file).stat().st_mtime == (
        dir_sync / file
    ).modified.timestamp()
(dir_sync_local / "file1").unlink()

local_file_new = dir_sync_local / "test/file3"
local_file_new_parent = local_file_new.parent
local_file_new_parent.mkdir()
local_file_new.touch()
assert num_files(dir_sync_local) == 2
dir_sync_local = settings.storage.cloud_to_local(dir_sync)
assert num_files(dir_sync_local) == 2
assert local_file_new.exists()
time.sleep(1)
cloud_file = dir_sync / "file1"
cloud_file.touch()  # update cloud timestamp

assert cloud_file.modified.timestamp() > local_file_new.stat().st_mtime
dir_sync_local = settings.storage.cloud_to_local(dir_sync)

assert num_files(dir_sync_local) == 2
assert not local_file_new.exists()
assert not local_file_new_parent.exists()

for file in ("file1", "file2"):
    assert (dir_sync_local / file).stat().st_mtime == (
        dir_sync / file
    ).modified.timestamp()
dir_sync.rmdir()

for file in dir_sync_local.iterdir():
    file.unlink()
dir_sync_local.rmdir()

Get the paths to the cloud and local sqlite databases.

sqlite_file = settings.instance._sqlite_file
sqlite_file

Remote SQLite file does exists upon instance init:

assert settings.instance._sqlite_file.exists()

Now mimic a new user who loads the instance (this runs 4s):

settings.instance._update_local_sqlite_file()

Get the mere filepath of the local file, without any update:

cache_file = settings.instance.storage.cloud_to_local_no_update(sqlite_file)
cache_file

Delete the local sqlite file:

cache_file.unlink()
assert not cache_file.exists()

Update the local version of the sqlite file:

settings.instance._update_local_sqlite_file()
assert cache_file.exists()

If the local sqlite database is older than the cloud one, the cloud database replaces the local sqlite database file.

cloud_mtime = sqlite_file.modified.timestamp()
cloud_mtime
os.utime(cache_file, times=(cloud_mtime - 1, cloud_mtime - 1))
assert cache_file.stat().st_mtime < sqlite_file.modified.timestamp()
settings.instance._update_local_sqlite_file()
assert cache_file.stat().st_mtime == sqlite_file.modified.timestamp()
Hide code cell content
!yes | lamin delete {instance_name}