abidlabs HF Staff commited on
Commit
b6356fb
·
verified ·
1 Parent(s): d698c0a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. CHANGELOG.md +57 -0
  3. __init__.py +418 -0
  4. __pycache__/__init__.cpython-311.pyc +0 -0
  5. __pycache__/__init__.cpython-312.pyc +0 -0
  6. __pycache__/cli.cpython-311.pyc +0 -0
  7. __pycache__/cli.cpython-312.pyc +0 -0
  8. __pycache__/commit_scheduler.cpython-311.pyc +0 -0
  9. __pycache__/commit_scheduler.cpython-312.pyc +0 -0
  10. __pycache__/context_vars.cpython-311.pyc +0 -0
  11. __pycache__/context_vars.cpython-312.pyc +0 -0
  12. __pycache__/deploy.cpython-311.pyc +0 -0
  13. __pycache__/deploy.cpython-312.pyc +0 -0
  14. __pycache__/dummy_commit_scheduler.cpython-311.pyc +0 -0
  15. __pycache__/dummy_commit_scheduler.cpython-312.pyc +0 -0
  16. __pycache__/file_storage.cpython-311.pyc +0 -0
  17. __pycache__/file_storage.cpython-312.pyc +0 -0
  18. __pycache__/histogram.cpython-311.pyc +0 -0
  19. __pycache__/histogram.cpython-312.pyc +0 -0
  20. __pycache__/imports.cpython-311.pyc +0 -0
  21. __pycache__/imports.cpython-312.pyc +0 -0
  22. __pycache__/media.cpython-311.pyc +0 -0
  23. __pycache__/media.cpython-312.pyc +0 -0
  24. __pycache__/run.cpython-311.pyc +0 -0
  25. __pycache__/run.cpython-312.pyc +0 -0
  26. __pycache__/sqlite_storage.cpython-311.pyc +0 -0
  27. __pycache__/sqlite_storage.cpython-312.pyc +0 -0
  28. __pycache__/table.cpython-311.pyc +0 -0
  29. __pycache__/table.cpython-312.pyc +0 -0
  30. __pycache__/typehints.cpython-311.pyc +0 -0
  31. __pycache__/typehints.cpython-312.pyc +0 -0
  32. __pycache__/utils.cpython-311.pyc +0 -0
  33. __pycache__/utils.cpython-312.pyc +0 -0
  34. __pycache__/video_writer.cpython-311.pyc +0 -0
  35. __pycache__/video_writer.cpython-312.pyc +0 -0
  36. assets/trackio_logo_dark.png +0 -0
  37. assets/trackio_logo_light.png +0 -0
  38. assets/trackio_logo_old.png +3 -0
  39. assets/trackio_logo_type_dark.png +0 -0
  40. assets/trackio_logo_type_dark_transparent.png +0 -0
  41. assets/trackio_logo_type_light.png +0 -0
  42. assets/trackio_logo_type_light_transparent.png +0 -0
  43. cli.py +75 -0
  44. commit_scheduler.py +391 -0
  45. context_vars.py +18 -0
  46. deploy.py +311 -0
  47. dummy_commit_scheduler.py +12 -0
  48. histogram.py +68 -0
  49. imports.py +304 -0
  50. media/__init__.py +34 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/trackio_logo_old.png filter=lfs diff=lfs merge=lfs -text
CHANGELOG.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # trackio
2
+
3
+ ## 0.8.1
4
+
5
+ ### Features
6
+
7
+ - [#336](https://github.com/gradio-app/trackio/pull/336) [`5f9f51d`](https://github.com/gradio-app/trackio/commit/5f9f51dac8677f240d7c42c3e3b2660a22aee138) - Support a list of `Trackio.Image` in a `trackio.Table` cell. Thanks @abidlabs!
8
+
9
+ ## 0.8.0
10
+
11
+ ### Features
12
+
13
+ - [#331](https://github.com/gradio-app/trackio/pull/331) [`2c02d0f`](https://github.com/gradio-app/trackio/commit/2c02d0fd0a5824160528782402bb0dd4083396d5) - Truncate table string values that are greater than 250 characters (configuirable via env variable). Thanks @abidlabs!
14
+ - [#324](https://github.com/gradio-app/trackio/pull/324) [`50b2122`](https://github.com/gradio-app/trackio/commit/50b2122e7965ac82a72e6cb3b7d048bc10a2a6b1) - Add log y-axis functionality to UI. Thanks @abidlabs!
15
+ - [#326](https://github.com/gradio-app/trackio/pull/326) [`61dc1f4`](https://github.com/gradio-app/trackio/commit/61dc1f40af2f545f8e70395ddf0dbb8aee6b60d5) - Fix: improve table rendering for metrics in Trackio Dashboard. Thanks @vigneshwaran!
16
+ - [#328](https://github.com/gradio-app/trackio/pull/328) [`6857cbb`](https://github.com/gradio-app/trackio/commit/6857cbbe557a59a4642f210ec42566d108294e63) - Support trackio.Table with trackio.Image columns. Thanks @abidlabs!
17
+
18
+ ## 0.7.0
19
+
20
+ ### Features
21
+
22
+ - [#277](https://github.com/gradio-app/trackio/pull/277) [`db35601`](https://github.com/gradio-app/trackio/commit/db35601b9c023423c4654c9909b8ab73e58737de) - fix: make grouped runs view reflect live updates. Thanks @Saba9!
23
+ - [#320](https://github.com/gradio-app/trackio/pull/320) [`24ae739`](https://github.com/gradio-app/trackio/commit/24ae73969b09fb3126acd2f91647cdfbf8cf72a1) - Add additional query parms for xmin, xmax, and smoothing. Thanks @abidlabs!
24
+ - [#270](https://github.com/gradio-app/trackio/pull/270) [`cd1dfc3`](https://github.com/gradio-app/trackio/commit/cd1dfc3dc641b4499ac6d4a1b066fa8e2b52c57b) - feature: add support for logging audio. Thanks @Saba9!
25
+
26
+ ## 0.6.0
27
+
28
+ ### Features
29
+
30
+ - [#309](https://github.com/gradio-app/trackio/pull/309) [`1df2353`](https://github.com/gradio-app/trackio/commit/1df23534d6c01938c8db9c0f584ffa23e8d6021d) - Add histogram support with wandb-compatible API. Thanks @abidlabs!
31
+ - [#315](https://github.com/gradio-app/trackio/pull/315) [`76ba060`](https://github.com/gradio-app/trackio/commit/76ba06055dc43ca8f03b79f3e72d761949bd19a8) - Add guards to avoid silent fails. Thanks @Xmaster6y!
32
+ - [#313](https://github.com/gradio-app/trackio/pull/313) [`a606b3e`](https://github.com/gradio-app/trackio/commit/a606b3e1c5edf3d4cf9f31bd50605226a5a1c5d0) - No longer prevent certain keys from being used. Instead, dunderify them to prevent collisions with internal usage. Thanks @abidlabs!
33
+ - [#317](https://github.com/gradio-app/trackio/pull/317) [`27370a5`](https://github.com/gradio-app/trackio/commit/27370a595d0dbdf7eebbe7159d2ba778f039da44) - quick fixes for trackio.histogram. Thanks @abidlabs!
34
+ - [#312](https://github.com/gradio-app/trackio/pull/312) [`aa0f3bf`](https://github.com/gradio-app/trackio/commit/aa0f3bf372e7a0dd592a38af699c998363830eeb) - Fix video logging by adding TRACKIO_DIR to allowed_paths. Thanks @abidlabs!
35
+
36
+ ## 0.5.3
37
+
38
+ ### Features
39
+
40
+ - [#300](https://github.com/gradio-app/trackio/pull/300) [`5e4cacf`](https://github.com/gradio-app/trackio/commit/5e4cacf2e7ce527b4ce60de3a5bc05d2c02c77fb) - Adds more environment variables to allow customization of Trackio dashboard. Thanks @abidlabs!
41
+
42
+ ## 0.5.2
43
+
44
+ ### Features
45
+
46
+ - [#293](https://github.com/gradio-app/trackio/pull/293) [`64afc28`](https://github.com/gradio-app/trackio/commit/64afc28d3ea1dfd821472dc6bf0b8ed35a9b74be) - Ensures that the TRACKIO_DIR environment variable is respected. Thanks @abidlabs!
47
+ - [#287](https://github.com/gradio-app/trackio/pull/287) [`cd3e929`](https://github.com/gradio-app/trackio/commit/cd3e9294320949e6b8b829239069a43d5d7ff4c1) - fix(sqlite): unify .sqlite extension, allow export when DBs exist, clean WAL sidecars on import. Thanks @vaibhav-research!
48
+
49
+ ### Fixes
50
+
51
+ - [#291](https://github.com/gradio-app/trackio/pull/291) [`3b5adc3`](https://github.com/gradio-app/trackio/commit/3b5adc3d1f452dbab7a714d235f4974782f93730) - Fix the wheel build. Thanks @pngwn!
52
+
53
+ ## 0.5.1
54
+
55
+ ### Fixes
56
+
57
+ - [#278](https://github.com/gradio-app/trackio/pull/278) [`314c054`](https://github.com/gradio-app/trackio/commit/314c05438007ddfea3383e06fd19143e27468e2d) - Fix row orientation of metrics plots. Thanks @abidlabs!
__init__.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import json
3
+ import logging
4
+ import os
5
+ import warnings
6
+ import webbrowser
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from gradio.blocks import BUILT_IN_THEMES
11
+ from gradio.themes import Default as DefaultTheme
12
+ from gradio.themes import ThemeClass
13
+ from gradio.utils import TupleNoPrint
14
+ from gradio_client import Client
15
+ from huggingface_hub import SpaceStorage
16
+
17
+ from trackio import context_vars, deploy, utils
18
+ from trackio.deploy import sync
19
+ from trackio.histogram import Histogram
20
+ from trackio.imports import import_csv, import_tf_events
21
+ from trackio.media import TrackioAudio, TrackioImage, TrackioVideo
22
+ from trackio.run import Run
23
+ from trackio.sqlite_storage import SQLiteStorage
24
+ from trackio.table import Table
25
+ from trackio.ui.main import demo
26
+ from trackio.utils import TRACKIO_DIR, TRACKIO_LOGO_DIR
27
+
28
+ logging.getLogger("httpx").setLevel(logging.WARNING)
29
+
30
+ warnings.filterwarnings(
31
+ "ignore",
32
+ message="Empty session being created. Install gradio\\[oauth\\]",
33
+ category=UserWarning,
34
+ module="gradio.helpers",
35
+ )
36
+
37
+ __version__ = json.loads(Path(__file__).parent.joinpath("package.json").read_text())[
38
+ "version"
39
+ ]
40
+
41
+ __all__ = [
42
+ "init",
43
+ "log",
44
+ "finish",
45
+ "show",
46
+ "sync",
47
+ "delete_project",
48
+ "import_csv",
49
+ "import_tf_events",
50
+ "Image",
51
+ "Video",
52
+ "Audio",
53
+ "Table",
54
+ "Histogram",
55
+ ]
56
+
57
+ Image = TrackioImage
58
+ Video = TrackioVideo
59
+ Audio = TrackioAudio
60
+
61
+
62
+ config = {}
63
+
64
+ DEFAULT_THEME = "default"
65
+
66
+
67
+ def init(
68
+ project: str,
69
+ name: str | None = None,
70
+ group: str | None = None,
71
+ space_id: str | None = None,
72
+ space_storage: SpaceStorage | None = None,
73
+ dataset_id: str | None = None,
74
+ config: dict | None = None,
75
+ resume: str = "never",
76
+ settings: Any = None,
77
+ private: bool | None = None,
78
+ embed: bool = True,
79
+ ) -> Run:
80
+ """
81
+ Creates a new Trackio project and returns a [`Run`] object.
82
+
83
+ Args:
84
+ project (`str`):
85
+ The name of the project (can be an existing project to continue tracking or
86
+ a new project to start tracking from scratch).
87
+ name (`str`, *optional*):
88
+ The name of the run (if not provided, a default name will be generated).
89
+ group (`str`, *optional*):
90
+ The name of the group which this run belongs to in order to help organize
91
+ related runs together. You can toggle the entire group's visibilitiy in the
92
+ dashboard.
93
+ space_id (`str`, *optional*):
94
+ If provided, the project will be logged to a Hugging Face Space instead of
95
+ a local directory. Should be a complete Space name like
96
+ `"username/reponame"` or `"orgname/reponame"`, or just `"reponame"` in which
97
+ case the Space will be created in the currently-logged-in Hugging Face
98
+ user's namespace. If the Space does not exist, it will be created. If the
99
+ Space already exists, the project will be logged to it.
100
+ space_storage ([`~huggingface_hub.SpaceStorage`], *optional*):
101
+ Choice of persistent storage tier.
102
+ dataset_id (`str`, *optional*):
103
+ If a `space_id` is provided, a persistent Hugging Face Dataset will be
104
+ created and the metrics will be synced to it every 5 minutes. Specify a
105
+ Dataset with name like `"username/datasetname"` or `"orgname/datasetname"`,
106
+ or `"datasetname"` (uses currently-logged-in Hugging Face user's namespace),
107
+ or `None` (uses the same name as the Space but with the `"_dataset"`
108
+ suffix). If the Dataset does not exist, it will be created. If the Dataset
109
+ already exists, the project will be appended to it.
110
+ config (`dict`, *optional*):
111
+ A dictionary of configuration options. Provided for compatibility with
112
+ `wandb.init()`.
113
+ resume (`str`, *optional*, defaults to `"never"`):
114
+ Controls how to handle resuming a run. Can be one of:
115
+
116
+ - `"must"`: Must resume the run with the given name, raises error if run
117
+ doesn't exist
118
+ - `"allow"`: Resume the run if it exists, otherwise create a new run
119
+ - `"never"`: Never resume a run, always create a new one
120
+ private (`bool`, *optional*):
121
+ Whether to make the Space private. If None (default), the repo will be
122
+ public unless the organization's default is private. This value is ignored
123
+ if the repo already exists.
124
+ settings (`Any`, *optional*):
125
+ Not used. Provided for compatibility with `wandb.init()`.
126
+ embed (`bool`, *optional*, defaults to `True`):
127
+ If running inside a jupyter/Colab notebook, whether the dashboard should
128
+ automatically be embedded in the cell when trackio.init() is called.
129
+
130
+ Returns:
131
+ `Run`: A [`Run`] object that can be used to log metrics and finish the run.
132
+ """
133
+ if settings is not None:
134
+ warnings.warn(
135
+ "* Warning: settings is not used. Provided for compatibility with wandb.init(). Please create an issue at: https://github.com/gradio-app/trackio/issues if you need a specific feature implemented."
136
+ )
137
+
138
+ if space_id is None and dataset_id is not None:
139
+ raise ValueError("Must provide a `space_id` when `dataset_id` is provided.")
140
+ space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
141
+ url = context_vars.current_server.get()
142
+ share_url = context_vars.current_share_server.get()
143
+
144
+ if url is None:
145
+ if space_id is None:
146
+ _, url, share_url = demo.launch(
147
+ show_api=False,
148
+ inline=False,
149
+ quiet=True,
150
+ prevent_thread_lock=True,
151
+ show_error=True,
152
+ favicon_path=TRACKIO_LOGO_DIR / "trackio_logo_light.png",
153
+ allowed_paths=[TRACKIO_LOGO_DIR, TRACKIO_DIR],
154
+ )
155
+ else:
156
+ url = space_id
157
+ share_url = None
158
+ context_vars.current_server.set(url)
159
+ context_vars.current_share_server.set(share_url)
160
+ if (
161
+ context_vars.current_project.get() is None
162
+ or context_vars.current_project.get() != project
163
+ ):
164
+ print(f"* Trackio project initialized: {project}")
165
+
166
+ if dataset_id is not None:
167
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
168
+ print(
169
+ f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}"
170
+ )
171
+ if space_id is None:
172
+ print(f"* Trackio metrics logged to: {TRACKIO_DIR}")
173
+ if utils.is_in_notebook() and embed:
174
+ base_url = share_url + "/" if share_url else url
175
+ full_url = utils.get_full_url(
176
+ base_url, project=project, write_token=demo.write_token
177
+ )
178
+ utils.embed_url_in_notebook(full_url)
179
+ else:
180
+ utils.print_dashboard_instructions(project)
181
+ else:
182
+ deploy.create_space_if_not_exists(
183
+ space_id, space_storage, dataset_id, private
184
+ )
185
+ user_name, space_name = space_id.split("/")
186
+ space_url = deploy.SPACE_HOST_URL.format(
187
+ user_name=user_name, space_name=space_name
188
+ )
189
+ print(f"* View dashboard by going to: {space_url}")
190
+ if utils.is_in_notebook() and embed:
191
+ utils.embed_url_in_notebook(space_url)
192
+ context_vars.current_project.set(project)
193
+
194
+ client = None
195
+ if not space_id:
196
+ client = Client(url, verbose=False)
197
+
198
+ if resume == "must":
199
+ if name is None:
200
+ raise ValueError("Must provide a run name when resume='must'")
201
+ if name not in SQLiteStorage.get_runs(project):
202
+ raise ValueError(f"Run '{name}' does not exist in project '{project}'")
203
+ resumed = True
204
+ elif resume == "allow":
205
+ resumed = name is not None and name in SQLiteStorage.get_runs(project)
206
+ elif resume == "never":
207
+ if name is not None and name in SQLiteStorage.get_runs(project):
208
+ warnings.warn(
209
+ f"* Warning: resume='never' but a run '{name}' already exists in "
210
+ f"project '{project}'. Generating a new name and instead. If you want "
211
+ "to resume this run, call init() with resume='must' or resume='allow'."
212
+ )
213
+ name = None
214
+ resumed = False
215
+ else:
216
+ raise ValueError("resume must be one of: 'must', 'allow', or 'never'")
217
+
218
+ run = Run(
219
+ url=url,
220
+ project=project,
221
+ client=client,
222
+ name=name,
223
+ group=group,
224
+ config=config,
225
+ space_id=space_id,
226
+ )
227
+
228
+ if resumed:
229
+ print(f"* Resumed existing run: {run.name}")
230
+ else:
231
+ print(f"* Created new run: {run.name}")
232
+
233
+ context_vars.current_run.set(run)
234
+ globals()["config"] = run.config
235
+ return run
236
+
237
+
238
+ def log(metrics: dict, step: int | None = None) -> None:
239
+ """
240
+ Logs metrics to the current run.
241
+
242
+ Args:
243
+ metrics (`dict`):
244
+ A dictionary of metrics to log.
245
+ step (`int`, *optional*):
246
+ The step number. If not provided, the step will be incremented
247
+ automatically.
248
+ """
249
+ run = context_vars.current_run.get()
250
+ if run is None:
251
+ raise RuntimeError("Call trackio.init() before trackio.log().")
252
+ run.log(
253
+ metrics=metrics,
254
+ step=step,
255
+ )
256
+
257
+
258
+ def finish():
259
+ """
260
+ Finishes the current run.
261
+ """
262
+ run = context_vars.current_run.get()
263
+ if run is None:
264
+ raise RuntimeError("Call trackio.init() before trackio.finish().")
265
+ run.finish()
266
+
267
+
268
+ def delete_project(project: str, force: bool = False) -> bool:
269
+ """
270
+ Deletes a project by removing its local SQLite database.
271
+
272
+ Args:
273
+ project (`str`):
274
+ The name of the project to delete.
275
+ force (`bool`, *optional*, defaults to `False`):
276
+ If `True`, deletes the project without prompting for confirmation.
277
+ If `False`, prompts the user to confirm before deleting.
278
+
279
+ Returns:
280
+ `bool`: `True` if the project was deleted, `False` otherwise.
281
+ """
282
+ db_path = SQLiteStorage.get_project_db_path(project)
283
+
284
+ if not db_path.exists():
285
+ print(f"* Project '{project}' does not exist.")
286
+ return False
287
+
288
+ if not force:
289
+ response = input(
290
+ f"Are you sure you want to delete project '{project}'? "
291
+ f"This will permanently delete all runs and metrics. (y/N): "
292
+ )
293
+ if response.lower() not in ["y", "yes"]:
294
+ print("* Deletion cancelled.")
295
+ return False
296
+
297
+ try:
298
+ db_path.unlink()
299
+
300
+ for suffix in ("-wal", "-shm"):
301
+ sidecar = Path(str(db_path) + suffix)
302
+ if sidecar.exists():
303
+ sidecar.unlink()
304
+
305
+ print(f"* Project '{project}' has been deleted.")
306
+ return True
307
+ except Exception as e:
308
+ print(f"* Error deleting project '{project}': {e}")
309
+ return False
310
+
311
+
312
+ def show(
313
+ project: str | None = None,
314
+ theme: str | ThemeClass | None = None,
315
+ mcp_server: bool | None = None,
316
+ color_palette: list[str] | None = None,
317
+ *,
318
+ open_browser: bool = True,
319
+ block_thread: bool | None = None,
320
+ ):
321
+ """
322
+ Launches the Trackio dashboard.
323
+
324
+ Args:
325
+ project (`str`, *optional*):
326
+ The name of the project whose runs to show. If not provided, all projects
327
+ will be shown and the user can select one.
328
+ theme (`str` or `ThemeClass`, *optional*):
329
+ A Gradio Theme to use for the dashboard instead of the default Gradio theme,
330
+ can be a built-in theme (e.g. `'soft'`, `'citrus'`), a theme from the Hub
331
+ (e.g. `"gstaff/xkcd"`), or a custom Theme class. If not provided, the
332
+ `TRACKIO_THEME` environment variable will be used, or if that is not set, the
333
+ default Gradio theme will be used.
334
+ mcp_server (`bool`, *optional*):
335
+ If `True`, the Trackio dashboard will be set up as an MCP server and certain
336
+ functions will be added as MCP tools. If `None` (default behavior), then the
337
+ `GRADIO_MCP_SERVER` environment variable will be used to determine if the
338
+ MCP server should be enabled (which is `"True"` on Hugging Face Spaces).
339
+ color_palette (`list[str]`, *optional*):
340
+ A list of hex color codes to use for plot lines. If not provided, the
341
+ `TRACKIO_COLOR_PALETTE` environment variable will be used (comma-separated
342
+ hex codes), or if that is not set, the default color palette will be used.
343
+ Example: `['#FF0000', '#00FF00', '#0000FF']`
344
+ open_browser (`bool`, *optional*, defaults to `True`):
345
+ If `True` and not in a notebook, a new browser tab will be opened with the dashboard.
346
+ If `False`, the browser will not be opened.
347
+ block_thread (`bool`, *optional*):
348
+ If `True`, the main thread will be blocked until the dashboard is closed.
349
+ If `None` (default behavior), then the main thread will not be blocked if the
350
+ dashboard is launched in a notebook, otherwise the main thread will be blocked.
351
+
352
+ Returns:
353
+ `app`: The Gradio app object corresponding to the dashboard launched by Trackio.
354
+ `url`: The local URL of the dashboard.
355
+ `share_url`: The public share URL of the dashboard.
356
+ `full_url`: The full URL of the dashboard including the write token (will use the public share URL if launched publicly, otherwise the local URL).
357
+ """
358
+ if color_palette is not None:
359
+ os.environ["TRACKIO_COLOR_PALETTE"] = ",".join(color_palette)
360
+
361
+ theme = theme or os.environ.get("TRACKIO_THEME", DEFAULT_THEME)
362
+
363
+ if theme != DEFAULT_THEME:
364
+ # TODO: It's a little hacky to reproduce this theme-setting logic from Gradio Blocks,
365
+ # but in Gradio 6.0, the theme will be set in `launch()` instead, which means that we
366
+ # will be able to remove this code.
367
+ if isinstance(theme, str):
368
+ if theme.lower() in BUILT_IN_THEMES:
369
+ theme = BUILT_IN_THEMES[theme.lower()]
370
+ else:
371
+ try:
372
+ theme = ThemeClass.from_hub(theme)
373
+ except Exception as e:
374
+ warnings.warn(f"Cannot load {theme}. Caught Exception: {str(e)}")
375
+ theme = DefaultTheme()
376
+ if not isinstance(theme, ThemeClass):
377
+ warnings.warn("Theme should be a class loaded from gradio.themes")
378
+ theme = DefaultTheme()
379
+ demo.theme: ThemeClass = theme
380
+ demo.theme_css = theme._get_theme_css()
381
+ demo.stylesheets = theme._stylesheets
382
+ theme_hasher = hashlib.sha256()
383
+ theme_hasher.update(demo.theme_css.encode("utf-8"))
384
+ demo.theme_hash = theme_hasher.hexdigest()
385
+
386
+ _mcp_server = (
387
+ mcp_server
388
+ if mcp_server is not None
389
+ else os.environ.get("GRADIO_MCP_SERVER", "False") == "True"
390
+ )
391
+
392
+ app, url, share_url = demo.launch(
393
+ show_api=_mcp_server,
394
+ quiet=True,
395
+ inline=False,
396
+ prevent_thread_lock=True,
397
+ favicon_path=TRACKIO_LOGO_DIR / "trackio_logo_light.png",
398
+ allowed_paths=[TRACKIO_LOGO_DIR, TRACKIO_DIR],
399
+ mcp_server=_mcp_server,
400
+ )
401
+
402
+ base_url = share_url + "/" if share_url else url
403
+ full_url = utils.get_full_url(
404
+ base_url, project=project, write_token=demo.write_token
405
+ )
406
+
407
+ if not utils.is_in_notebook():
408
+ print(f"* Trackio UI launched at: {full_url}")
409
+ if open_browser:
410
+ webbrowser.open(full_url)
411
+ block_thread = block_thread if block_thread is not None else True
412
+ else:
413
+ utils.embed_url_in_notebook(full_url)
414
+ block_thread = block_thread if block_thread is not None else False
415
+
416
+ if block_thread:
417
+ utils.block_main_thread_until_keyboard_interrupt()
418
+ return TupleNoPrint((demo, url, share_url, full_url))
__pycache__/__init__.cpython-311.pyc ADDED
Binary file (19.6 kB). View file
 
__pycache__/__init__.cpython-312.pyc ADDED
Binary file (18.3 kB). View file
 
__pycache__/cli.cpython-311.pyc ADDED
Binary file (3.78 kB). View file
 
__pycache__/cli.cpython-312.pyc ADDED
Binary file (3.34 kB). View file
 
__pycache__/commit_scheduler.cpython-311.pyc ADDED
Binary file (20.2 kB). View file
 
__pycache__/commit_scheduler.cpython-312.pyc ADDED
Binary file (18.8 kB). View file
 
__pycache__/context_vars.cpython-311.pyc ADDED
Binary file (1 kB). View file
 
__pycache__/context_vars.cpython-312.pyc ADDED
Binary file (921 Bytes). View file
 
__pycache__/deploy.cpython-311.pyc ADDED
Binary file (13.9 kB). View file
 
__pycache__/deploy.cpython-312.pyc ADDED
Binary file (12.7 kB). View file
 
__pycache__/dummy_commit_scheduler.cpython-311.pyc ADDED
Binary file (1.19 kB). View file
 
__pycache__/dummy_commit_scheduler.cpython-312.pyc ADDED
Binary file (1.01 kB). View file
 
__pycache__/file_storage.cpython-311.pyc ADDED
Binary file (1.88 kB). View file
 
__pycache__/file_storage.cpython-312.pyc ADDED
Binary file (1.63 kB). View file
 
__pycache__/histogram.cpython-311.pyc ADDED
Binary file (3.15 kB). View file
 
__pycache__/histogram.cpython-312.pyc ADDED
Binary file (2.97 kB). View file
 
__pycache__/imports.cpython-311.pyc ADDED
Binary file (14.4 kB). View file
 
__pycache__/imports.cpython-312.pyc ADDED
Binary file (13.3 kB). View file
 
__pycache__/media.cpython-311.pyc ADDED
Binary file (16 kB). View file
 
__pycache__/media.cpython-312.pyc ADDED
Binary file (15 kB). View file
 
__pycache__/run.cpython-311.pyc ADDED
Binary file (11.8 kB). View file
 
__pycache__/run.cpython-312.pyc ADDED
Binary file (10.7 kB). View file
 
__pycache__/sqlite_storage.cpython-311.pyc ADDED
Binary file (37.5 kB). View file
 
__pycache__/sqlite_storage.cpython-312.pyc ADDED
Binary file (31.7 kB). View file
 
__pycache__/table.cpython-311.pyc ADDED
Binary file (9.92 kB). View file
 
__pycache__/table.cpython-312.pyc ADDED
Binary file (8.52 kB). View file
 
__pycache__/typehints.cpython-311.pyc ADDED
Binary file (1.12 kB). View file
 
__pycache__/typehints.cpython-312.pyc ADDED
Binary file (908 Bytes). View file
 
__pycache__/utils.cpython-311.pyc ADDED
Binary file (32.5 kB). View file
 
__pycache__/utils.cpython-312.pyc ADDED
Binary file (28.4 kB). View file
 
__pycache__/video_writer.cpython-311.pyc ADDED
Binary file (5.72 kB). View file
 
__pycache__/video_writer.cpython-312.pyc ADDED
Binary file (5.33 kB). View file
 
assets/trackio_logo_dark.png ADDED
assets/trackio_logo_light.png ADDED
assets/trackio_logo_old.png ADDED

Git LFS Details

  • SHA256: 3922c4d1e465270ad4d8abb12023f3beed5d9f7f338528a4c0ac21dcf358a1c8
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
assets/trackio_logo_type_dark.png ADDED
assets/trackio_logo_type_dark_transparent.png ADDED
assets/trackio_logo_type_light.png ADDED
assets/trackio_logo_type_light_transparent.png ADDED
cli.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ from trackio import show, sync
4
+
5
+
6
+ def main():
7
+ parser = argparse.ArgumentParser(description="Trackio CLI")
8
+ subparsers = parser.add_subparsers(dest="command")
9
+
10
+ ui_parser = subparsers.add_parser(
11
+ "show", help="Show the Trackio dashboard UI for a project"
12
+ )
13
+ ui_parser.add_argument(
14
+ "--project", required=False, help="Project name to show in the dashboard"
15
+ )
16
+ ui_parser.add_argument(
17
+ "--theme",
18
+ required=False,
19
+ default="default",
20
+ help="A Gradio Theme to use for the dashboard instead of the default, can be a built-in theme (e.g. 'soft', 'citrus'), or a theme from the Hub (e.g. 'gstaff/xkcd').",
21
+ )
22
+ ui_parser.add_argument(
23
+ "--mcp-server",
24
+ action="store_true",
25
+ help="Enable MCP server functionality. The Trackio dashboard will be set up as an MCP server and certain functions will be exposed as MCP tools.",
26
+ )
27
+ ui_parser.add_argument(
28
+ "--color-palette",
29
+ required=False,
30
+ help="Comma-separated list of hex color codes for plot lines (e.g. '#FF0000,#00FF00,#0000FF'). If not provided, the TRACKIO_COLOR_PALETTE environment variable will be used, or the default palette if not set.",
31
+ )
32
+
33
+ sync_parser = subparsers.add_parser(
34
+ "sync",
35
+ help="Sync a local project's database to a Hugging Face Space. If the Space does not exist, it will be created.",
36
+ )
37
+ sync_parser.add_argument(
38
+ "--project", required=True, help="The name of the local project."
39
+ )
40
+ sync_parser.add_argument(
41
+ "--space-id",
42
+ required=True,
43
+ help="The Hugging Face Space ID where the project will be synced (e.g. username/space_id).",
44
+ )
45
+ sync_parser.add_argument(
46
+ "--private",
47
+ action="store_true",
48
+ help="Make the Hugging Face Space private if creating a new Space. By default, the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.",
49
+ )
50
+ sync_parser.add_argument(
51
+ "--force",
52
+ action="store_true",
53
+ help="Overwrite the existing database without prompting for confirmation.",
54
+ )
55
+
56
+ args = parser.parse_args()
57
+
58
+ if args.command == "show":
59
+ color_palette = None
60
+ if args.color_palette:
61
+ color_palette = [color.strip() for color in args.color_palette.split(",")]
62
+ show(args.project, args.theme, args.mcp_server, color_palette)
63
+ elif args.command == "sync":
64
+ sync(
65
+ project=args.project,
66
+ space_id=args.space_id,
67
+ private=args.private,
68
+ force=args.force,
69
+ )
70
+ else:
71
+ parser.print_help()
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()
commit_scheduler.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Originally copied from https://github.com/huggingface/huggingface_hub/blob/d0a948fc2a32ed6e557042a95ef3e4af97ec4a7c/src/huggingface_hub/_commit_scheduler.py
2
+
3
+ import atexit
4
+ import logging
5
+ import os
6
+ import time
7
+ from concurrent.futures import Future
8
+ from dataclasses import dataclass
9
+ from io import SEEK_END, SEEK_SET, BytesIO
10
+ from pathlib import Path
11
+ from threading import Lock, Thread
12
+ from typing import Callable, Dict, List, Union
13
+
14
+ from huggingface_hub.hf_api import (
15
+ DEFAULT_IGNORE_PATTERNS,
16
+ CommitInfo,
17
+ CommitOperationAdd,
18
+ HfApi,
19
+ )
20
+ from huggingface_hub.utils import filter_repo_objects
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class _FileToUpload:
27
+ """Temporary dataclass to store info about files to upload. Not meant to be used directly."""
28
+
29
+ local_path: Path
30
+ path_in_repo: str
31
+ size_limit: int
32
+ last_modified: float
33
+
34
+
35
+ class CommitScheduler:
36
+ """
37
+ Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes).
38
+
39
+ The recommended way to use the scheduler is to use it as a context manager. This ensures that the scheduler is
40
+ properly stopped and the last commit is triggered when the script ends. The scheduler can also be stopped manually
41
+ with the `stop` method. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads)
42
+ to learn more about how to use it.
43
+
44
+ Args:
45
+ repo_id (`str`):
46
+ The id of the repo to commit to.
47
+ folder_path (`str` or `Path`):
48
+ Path to the local folder to upload regularly.
49
+ every (`int` or `float`, *optional*):
50
+ The number of minutes between each commit. Defaults to 5 minutes.
51
+ path_in_repo (`str`, *optional*):
52
+ Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder
53
+ of the repository.
54
+ repo_type (`str`, *optional*):
55
+ The type of the repo to commit to. Defaults to `model`.
56
+ revision (`str`, *optional*):
57
+ The revision of the repo to commit to. Defaults to `main`.
58
+ private (`bool`, *optional*):
59
+ Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
60
+ token (`str`, *optional*):
61
+ The token to use to commit to the repo. Defaults to the token saved on the machine.
62
+ allow_patterns (`List[str]` or `str`, *optional*):
63
+ If provided, only files matching at least one pattern are uploaded.
64
+ ignore_patterns (`List[str]` or `str`, *optional*):
65
+ If provided, files matching any of the patterns are not uploaded.
66
+ squash_history (`bool`, *optional*):
67
+ Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
68
+ useful to avoid degraded performances on the repo when it grows too large.
69
+ hf_api (`HfApi`, *optional*):
70
+ The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...).
71
+ on_before_commit (`Callable[[], None]`, *optional*):
72
+ If specified, a function that will be called before the CommitScheduler lists files to create a commit.
73
+
74
+ Example:
75
+ ```py
76
+ >>> from pathlib import Path
77
+ >>> from huggingface_hub import CommitScheduler
78
+
79
+ # Scheduler uploads every 10 minutes
80
+ >>> csv_path = Path("watched_folder/data.csv")
81
+ >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10)
82
+
83
+ >>> with csv_path.open("a") as f:
84
+ ... f.write("first line")
85
+
86
+ # Some time later (...)
87
+ >>> with csv_path.open("a") as f:
88
+ ... f.write("second line")
89
+ ```
90
+
91
+ Example using a context manager:
92
+ ```py
93
+ >>> from pathlib import Path
94
+ >>> from huggingface_hub import CommitScheduler
95
+
96
+ >>> with CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path="watched_folder", every=10) as scheduler:
97
+ ... csv_path = Path("watched_folder/data.csv")
98
+ ... with csv_path.open("a") as f:
99
+ ... f.write("first line")
100
+ ... (...)
101
+ ... with csv_path.open("a") as f:
102
+ ... f.write("second line")
103
+
104
+ # Scheduler is now stopped and last commit have been triggered
105
+ ```
106
+ """
107
+
108
+ def __init__(
109
+ self,
110
+ *,
111
+ repo_id: str,
112
+ folder_path: Union[str, Path],
113
+ every: Union[int, float] = 5,
114
+ path_in_repo: str | None = None,
115
+ repo_type: str | None = None,
116
+ revision: str | None = None,
117
+ private: bool | None = None,
118
+ token: str | None = None,
119
+ allow_patterns: list[str] | str | None = None,
120
+ ignore_patterns: list[str] | str | None = None,
121
+ squash_history: bool = False,
122
+ hf_api: HfApi | None = None,
123
+ on_before_commit: Callable[[], None] | None = None,
124
+ ) -> None:
125
+ self.api = hf_api or HfApi(token=token)
126
+ self.on_before_commit = on_before_commit
127
+
128
+ # Folder
129
+ self.folder_path = Path(folder_path).expanduser().resolve()
130
+ self.path_in_repo = path_in_repo or ""
131
+ self.allow_patterns = allow_patterns
132
+
133
+ if ignore_patterns is None:
134
+ ignore_patterns = []
135
+ elif isinstance(ignore_patterns, str):
136
+ ignore_patterns = [ignore_patterns]
137
+ self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS
138
+
139
+ if self.folder_path.is_file():
140
+ raise ValueError(
141
+ f"'folder_path' must be a directory, not a file: '{self.folder_path}'."
142
+ )
143
+ self.folder_path.mkdir(parents=True, exist_ok=True)
144
+
145
+ # Repository
146
+ repo_url = self.api.create_repo(
147
+ repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True
148
+ )
149
+ self.repo_id = repo_url.repo_id
150
+ self.repo_type = repo_type
151
+ self.revision = revision
152
+ self.token = token
153
+
154
+ self.last_uploaded: Dict[Path, float] = {}
155
+ self.last_push_time: float | None = None
156
+
157
+ if not every > 0:
158
+ raise ValueError(f"'every' must be a positive integer, not '{every}'.")
159
+ self.lock = Lock()
160
+ self.every = every
161
+ self.squash_history = squash_history
162
+
163
+ logger.info(
164
+ f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes."
165
+ )
166
+ self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True)
167
+ self._scheduler_thread.start()
168
+ atexit.register(self._push_to_hub)
169
+
170
+ self.__stopped = False
171
+
172
+ def stop(self) -> None:
173
+ """Stop the scheduler.
174
+
175
+ A stopped scheduler cannot be restarted. Mostly for tests purposes.
176
+ """
177
+ self.__stopped = True
178
+
179
+ def __enter__(self) -> "CommitScheduler":
180
+ return self
181
+
182
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
183
+ # Upload last changes before exiting
184
+ self.trigger().result()
185
+ self.stop()
186
+ return
187
+
188
+ def _run_scheduler(self) -> None:
189
+ """Dumb thread waiting between each scheduled push to Hub."""
190
+ while True:
191
+ self.last_future = self.trigger()
192
+ time.sleep(self.every * 60)
193
+ if self.__stopped:
194
+ break
195
+
196
+ def trigger(self) -> Future:
197
+ """Trigger a `push_to_hub` and return a future.
198
+
199
+ This method is automatically called every `every` minutes. You can also call it manually to trigger a commit
200
+ immediately, without waiting for the next scheduled commit.
201
+ """
202
+ return self.api.run_as_future(self._push_to_hub)
203
+
204
+ def _push_to_hub(self) -> CommitInfo | None:
205
+ if self.__stopped: # If stopped, already scheduled commits are ignored
206
+ return None
207
+
208
+ logger.info("(Background) scheduled commit triggered.")
209
+ try:
210
+ value = self.push_to_hub()
211
+ if self.squash_history:
212
+ logger.info("(Background) squashing repo history.")
213
+ self.api.super_squash_history(
214
+ repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision
215
+ )
216
+ return value
217
+ except Exception as e:
218
+ logger.error(
219
+ f"Error while pushing to Hub: {e}"
220
+ ) # Depending on the setup, error might be silenced
221
+ raise
222
+
223
+ def push_to_hub(self) -> CommitInfo | None:
224
+ """
225
+ Push folder to the Hub and return the commit info.
226
+
227
+ <Tip warning={true}>
228
+
229
+ This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
230
+ queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
231
+ issues.
232
+
233
+ </Tip>
234
+
235
+ The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
236
+ uploads only changed files. If no changes are found, the method returns without committing anything. If you want
237
+ to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful
238
+ for example to compress data together in a single file before committing. For more details and examples, check
239
+ out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads).
240
+ """
241
+ # Check files to upload (with lock)
242
+ with self.lock:
243
+ if self.on_before_commit is not None:
244
+ self.on_before_commit()
245
+
246
+ logger.debug("Listing files to upload for scheduled commit.")
247
+
248
+ # List files from folder (taken from `_prepare_upload_folder_additions`)
249
+ relpath_to_abspath = {
250
+ path.relative_to(self.folder_path).as_posix(): path
251
+ for path in sorted(
252
+ self.folder_path.glob("**/*")
253
+ ) # sorted to be deterministic
254
+ if path.is_file()
255
+ }
256
+ prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
257
+
258
+ # Filter with pattern + filter out unchanged files + retrieve current file size
259
+ files_to_upload: List[_FileToUpload] = []
260
+ for relpath in filter_repo_objects(
261
+ relpath_to_abspath.keys(),
262
+ allow_patterns=self.allow_patterns,
263
+ ignore_patterns=self.ignore_patterns,
264
+ ):
265
+ local_path = relpath_to_abspath[relpath]
266
+ stat = local_path.stat()
267
+ if (
268
+ self.last_uploaded.get(local_path) is None
269
+ or self.last_uploaded[local_path] != stat.st_mtime
270
+ ):
271
+ files_to_upload.append(
272
+ _FileToUpload(
273
+ local_path=local_path,
274
+ path_in_repo=prefix + relpath,
275
+ size_limit=stat.st_size,
276
+ last_modified=stat.st_mtime,
277
+ )
278
+ )
279
+
280
+ # Return if nothing to upload
281
+ if len(files_to_upload) == 0:
282
+ logger.debug("Dropping schedule commit: no changed file to upload.")
283
+ return None
284
+
285
+ # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size)
286
+ logger.debug("Removing unchanged files since previous scheduled commit.")
287
+ add_operations = [
288
+ CommitOperationAdd(
289
+ # TODO: Cap the file to its current size, even if the user append data to it while a scheduled commit is happening
290
+ # (requires an upstream fix for XET-535: `hf_xet` should support `BinaryIO` for upload)
291
+ path_or_fileobj=file_to_upload.local_path,
292
+ path_in_repo=file_to_upload.path_in_repo,
293
+ )
294
+ for file_to_upload in files_to_upload
295
+ ]
296
+
297
+ # Upload files (append mode expected - no need for lock)
298
+ logger.debug("Uploading files for scheduled commit.")
299
+ commit_info = self.api.create_commit(
300
+ repo_id=self.repo_id,
301
+ repo_type=self.repo_type,
302
+ operations=add_operations,
303
+ commit_message="Scheduled Commit",
304
+ revision=self.revision,
305
+ )
306
+
307
+ for file in files_to_upload:
308
+ self.last_uploaded[file.local_path] = file.last_modified
309
+
310
+ self.last_push_time = time.time()
311
+
312
+ return commit_info
313
+
314
+
315
+ class PartialFileIO(BytesIO):
316
+ """A file-like object that reads only the first part of a file.
317
+
318
+ Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the
319
+ file is uploaded (i.e. the part that was available when the filesystem was first scanned).
320
+
321
+ In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal
322
+ disturbance for the user. The object is passed to `CommitOperationAdd`.
323
+
324
+ Only supports `read`, `tell` and `seek` methods.
325
+
326
+ Args:
327
+ file_path (`str` or `Path`):
328
+ Path to the file to read.
329
+ size_limit (`int`):
330
+ The maximum number of bytes to read from the file. If the file is larger than this, only the first part
331
+ will be read (and uploaded).
332
+ """
333
+
334
+ def __init__(self, file_path: Union[str, Path], size_limit: int) -> None:
335
+ self._file_path = Path(file_path)
336
+ self._file = self._file_path.open("rb")
337
+ self._size_limit = min(size_limit, os.fstat(self._file.fileno()).st_size)
338
+
339
+ def __del__(self) -> None:
340
+ self._file.close()
341
+ return super().__del__()
342
+
343
+ def __repr__(self) -> str:
344
+ return (
345
+ f"<PartialFileIO file_path={self._file_path} size_limit={self._size_limit}>"
346
+ )
347
+
348
+ def __len__(self) -> int:
349
+ return self._size_limit
350
+
351
+ def __getattribute__(self, name: str):
352
+ if name.startswith("_") or name in (
353
+ "read",
354
+ "tell",
355
+ "seek",
356
+ ): # only 3 public methods supported
357
+ return super().__getattribute__(name)
358
+ raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
359
+
360
+ def tell(self) -> int:
361
+ """Return the current file position."""
362
+ return self._file.tell()
363
+
364
+ def seek(self, __offset: int, __whence: int = SEEK_SET) -> int:
365
+ """Change the stream position to the given offset.
366
+
367
+ Behavior is the same as a regular file, except that the position is capped to the size limit.
368
+ """
369
+ if __whence == SEEK_END:
370
+ # SEEK_END => set from the truncated end
371
+ __offset = len(self) + __offset
372
+ __whence = SEEK_SET
373
+
374
+ pos = self._file.seek(__offset, __whence)
375
+ if pos > self._size_limit:
376
+ return self._file.seek(self._size_limit)
377
+ return pos
378
+
379
+ def read(self, __size: int | None = -1) -> bytes:
380
+ """Read at most `__size` bytes from the file.
381
+
382
+ Behavior is the same as a regular file, except that it is capped to the size limit.
383
+ """
384
+ current = self._file.tell()
385
+ if __size is None or __size < 0:
386
+ # Read until file limit
387
+ truncated_size = self._size_limit - current
388
+ else:
389
+ # Read until file limit or __size
390
+ truncated_size = min(__size, self._size_limit - current)
391
+ return self._file.read(truncated_size)
context_vars.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextvars
2
+ from typing import TYPE_CHECKING
3
+
4
+ if TYPE_CHECKING:
5
+ from trackio.run import Run
6
+
7
+ current_run: contextvars.ContextVar["Run | None"] = contextvars.ContextVar(
8
+ "current_run", default=None
9
+ )
10
+ current_project: contextvars.ContextVar[str | None] = contextvars.ContextVar(
11
+ "current_project", default=None
12
+ )
13
+ current_server: contextvars.ContextVar[str | None] = contextvars.ContextVar(
14
+ "current_server", default=None
15
+ )
16
+ current_share_server: contextvars.ContextVar[str | None] = contextvars.ContextVar(
17
+ "current_share_server", default=None
18
+ )
deploy.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib.metadata
2
+ import io
3
+ import os
4
+ import time
5
+ from importlib.resources import files
6
+ from pathlib import Path
7
+
8
+ import gradio
9
+ import huggingface_hub
10
+ from gradio_client import Client
11
+ from httpx import ReadTimeout
12
+ from huggingface_hub.errors import RepositoryNotFoundError
13
+ from requests import HTTPError
14
+
15
+ import trackio
16
+ from trackio.sqlite_storage import SQLiteStorage
17
+ from trackio.utils import preprocess_space_and_dataset_ids
18
+
19
+ SPACE_HOST_URL = "https://{user_name}-{space_name}.hf.space/"
20
+ SPACE_URL = "https://huggingface.co/spaces/{space_id}"
21
+
22
+
23
+ def _is_trackio_installed_from_source() -> bool:
24
+ """Check if trackio is installed from source/editable install vs PyPI."""
25
+ try:
26
+ trackio_file = trackio.__file__
27
+ if "site-packages" not in trackio_file:
28
+ return True
29
+
30
+ dist = importlib.metadata.distribution("trackio")
31
+ if dist.files:
32
+ files = list(dist.files)
33
+ has_pth = any(".pth" in str(f) for f in files)
34
+ if has_pth:
35
+ return True
36
+
37
+ return False
38
+ except (
39
+ AttributeError,
40
+ importlib.metadata.PackageNotFoundError,
41
+ importlib.metadata.MetadataError,
42
+ ValueError,
43
+ TypeError,
44
+ ):
45
+ return True
46
+
47
+
48
+ def deploy_as_space(
49
+ space_id: str,
50
+ space_storage: huggingface_hub.SpaceStorage | None = None,
51
+ dataset_id: str | None = None,
52
+ private: bool | None = None,
53
+ ):
54
+ if (
55
+ os.getenv("SYSTEM") == "spaces"
56
+ ): # in case a repo with this function is uploaded to spaces
57
+ return
58
+
59
+ trackio_path = files("trackio")
60
+
61
+ hf_api = huggingface_hub.HfApi()
62
+
63
+ try:
64
+ huggingface_hub.create_repo(
65
+ space_id,
66
+ private=private,
67
+ space_sdk="gradio",
68
+ space_storage=space_storage,
69
+ repo_type="space",
70
+ exist_ok=True,
71
+ )
72
+ except HTTPError as e:
73
+ if e.response.status_code in [401, 403]: # unauthorized or forbidden
74
+ print("Need 'write' access token to create a Spaces repo.")
75
+ huggingface_hub.login(add_to_git_credential=False)
76
+ huggingface_hub.create_repo(
77
+ space_id,
78
+ private=private,
79
+ space_sdk="gradio",
80
+ space_storage=space_storage,
81
+ repo_type="space",
82
+ exist_ok=True,
83
+ )
84
+ else:
85
+ raise ValueError(f"Failed to create Space: {e}")
86
+
87
+ with open(Path(trackio_path, "README.md"), "r") as f:
88
+ readme_content = f.read()
89
+ readme_content = readme_content.replace("{GRADIO_VERSION}", gradio.__version__)
90
+ readme_buffer = io.BytesIO(readme_content.encode("utf-8"))
91
+ hf_api.upload_file(
92
+ path_or_fileobj=readme_buffer,
93
+ path_in_repo="README.md",
94
+ repo_id=space_id,
95
+ repo_type="space",
96
+ )
97
+
98
+ # We can assume pandas, gradio, and huggingface-hub are already installed in a Gradio Space.
99
+ # Make sure necessary dependencies are installed by creating a requirements.txt.
100
+ is_source_install = _is_trackio_installed_from_source()
101
+
102
+ if is_source_install:
103
+ requirements_content = """pyarrow>=21.0
104
+ plotly>=6.0.0,<7.0.0"""
105
+ else:
106
+ requirements_content = f"""pyarrow>=21.0
107
+ trackio=={trackio.__version__}
108
+ plotly>=6.0.0,<7.0.0"""
109
+
110
+ requirements_buffer = io.BytesIO(requirements_content.encode("utf-8"))
111
+ hf_api.upload_file(
112
+ path_or_fileobj=requirements_buffer,
113
+ path_in_repo="requirements.txt",
114
+ repo_id=space_id,
115
+ repo_type="space",
116
+ )
117
+
118
+ huggingface_hub.utils.disable_progress_bars()
119
+
120
+ if is_source_install:
121
+ hf_api.upload_folder(
122
+ repo_id=space_id,
123
+ repo_type="space",
124
+ folder_path=trackio_path,
125
+ ignore_patterns=["README.md"],
126
+ )
127
+ else:
128
+ app_file_content = """import trackio
129
+ trackio.show()"""
130
+ app_file_buffer = io.BytesIO(app_file_content.encode("utf-8"))
131
+ hf_api.upload_file(
132
+ path_or_fileobj=app_file_buffer,
133
+ path_in_repo="ui/main.py",
134
+ repo_id=space_id,
135
+ repo_type="space",
136
+ )
137
+
138
+ if hf_token := huggingface_hub.utils.get_token():
139
+ huggingface_hub.add_space_secret(space_id, "HF_TOKEN", hf_token)
140
+ if dataset_id is not None:
141
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_DATASET_ID", dataset_id)
142
+
143
+ if logo_light_url := os.environ.get("TRACKIO_LOGO_LIGHT_URL"):
144
+ huggingface_hub.add_space_variable(
145
+ space_id, "TRACKIO_LOGO_LIGHT_URL", logo_light_url
146
+ )
147
+ if logo_dark_url := os.environ.get("TRACKIO_LOGO_DARK_URL"):
148
+ huggingface_hub.add_space_variable(
149
+ space_id, "TRACKIO_LOGO_DARK_URL", logo_dark_url
150
+ )
151
+
152
+ if plot_order := os.environ.get("TRACKIO_PLOT_ORDER"):
153
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_PLOT_ORDER", plot_order)
154
+
155
+ if theme := os.environ.get("TRACKIO_THEME"):
156
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_THEME", theme)
157
+
158
+ huggingface_hub.add_space_variable(space_id, "GRADIO_MCP_SERVER", "True")
159
+
160
+
161
+ def create_space_if_not_exists(
162
+ space_id: str,
163
+ space_storage: huggingface_hub.SpaceStorage | None = None,
164
+ dataset_id: str | None = None,
165
+ private: bool | None = None,
166
+ ) -> None:
167
+ """
168
+ Creates a new Hugging Face Space if it does not exist. If a dataset_id is provided, it will be added as a space variable.
169
+
170
+ Args:
171
+ space_id: The ID of the Space to create.
172
+ dataset_id: The ID of the Dataset to add to the Space.
173
+ private: Whether to make the Space private. If None (default), the repo will be
174
+ public unless the organization's default is private. This value is ignored if
175
+ the repo already exists.
176
+ """
177
+ if "/" not in space_id:
178
+ raise ValueError(
179
+ f"Invalid space ID: {space_id}. Must be in the format: username/reponame or orgname/reponame."
180
+ )
181
+ if dataset_id is not None and "/" not in dataset_id:
182
+ raise ValueError(
183
+ f"Invalid dataset ID: {dataset_id}. Must be in the format: username/datasetname or orgname/datasetname."
184
+ )
185
+ try:
186
+ huggingface_hub.repo_info(space_id, repo_type="space")
187
+ print(f"* Found existing space: {SPACE_URL.format(space_id=space_id)}")
188
+ if dataset_id is not None:
189
+ huggingface_hub.add_space_variable(
190
+ space_id, "TRACKIO_DATASET_ID", dataset_id
191
+ )
192
+ if logo_light_url := os.environ.get("TRACKIO_LOGO_LIGHT_URL"):
193
+ huggingface_hub.add_space_variable(
194
+ space_id, "TRACKIO_LOGO_LIGHT_URL", logo_light_url
195
+ )
196
+ if logo_dark_url := os.environ.get("TRACKIO_LOGO_DARK_URL"):
197
+ huggingface_hub.add_space_variable(
198
+ space_id, "TRACKIO_LOGO_DARK_URL", logo_dark_url
199
+ )
200
+
201
+ if plot_order := os.environ.get("TRACKIO_PLOT_ORDER"):
202
+ huggingface_hub.add_space_variable(
203
+ space_id, "TRACKIO_PLOT_ORDER", plot_order
204
+ )
205
+
206
+ if theme := os.environ.get("TRACKIO_THEME"):
207
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_THEME", theme)
208
+ return
209
+ except RepositoryNotFoundError:
210
+ pass
211
+ except HTTPError as e:
212
+ if e.response.status_code in [401, 403]: # unauthorized or forbidden
213
+ print("Need 'write' access token to create a Spaces repo.")
214
+ huggingface_hub.login(add_to_git_credential=False)
215
+ huggingface_hub.add_space_variable(
216
+ space_id, "TRACKIO_DATASET_ID", dataset_id
217
+ )
218
+ else:
219
+ raise ValueError(f"Failed to create Space: {e}")
220
+
221
+ print(f"* Creating new space: {SPACE_URL.format(space_id=space_id)}")
222
+ deploy_as_space(space_id, space_storage, dataset_id, private)
223
+
224
+
225
+ def wait_until_space_exists(
226
+ space_id: str,
227
+ ) -> None:
228
+ """
229
+ Blocks the current thread until the space exists.
230
+ May raise a TimeoutError if this takes quite a while.
231
+
232
+ Args:
233
+ space_id: The ID of the Space to wait for.
234
+ """
235
+ hf_api = huggingface_hub.HfApi()
236
+ delay = 1
237
+ for _ in range(10):
238
+ try:
239
+ hf_api.space_info(space_id)
240
+ return
241
+ except (huggingface_hub.utils.HfHubHTTPError, ReadTimeout):
242
+ time.sleep(delay)
243
+ delay = min(delay * 2, 30)
244
+ raise TimeoutError("Waiting for space to exist took longer than expected")
245
+
246
+
247
+ def upload_db_to_space(project: str, space_id: str, force: bool = False) -> None:
248
+ """
249
+ Uploads the database of a local Trackio project to a Hugging Face Space.
250
+
251
+ Args:
252
+ project: The name of the project to upload.
253
+ space_id: The ID of the Space to upload to.
254
+ force: If True, overwrite existing database without prompting. If False, prompt for confirmation.
255
+ """
256
+ db_path = SQLiteStorage.get_project_db_path(project)
257
+ hf_api = huggingface_hub.HfApi()
258
+
259
+ client = Client(space_id, verbose=False)
260
+ repo_files_path = client.predict(
261
+ api_name="/get_db_path",
262
+ project=project,
263
+ )
264
+
265
+ if not force and hf_api.file_exists(
266
+ repo_id=space_id,
267
+ filename=repo_files_path,
268
+ repo_type="space",
269
+ ):
270
+ response = input(
271
+ f"Database for project '{project}' already exists on Space '{space_id}'. "
272
+ f"Overwrite it? (y/N): "
273
+ )
274
+ if response.lower() not in ["y", "yes"]:
275
+ print("* Upload cancelled.")
276
+ return
277
+
278
+ hf_api.upload_file(
279
+ path_or_fileobj=str(db_path),
280
+ path_in_repo=repo_files_path,
281
+ repo_id=space_id,
282
+ repo_type="space",
283
+ )
284
+
285
+
286
+ def sync(
287
+ project: str, space_id: str, private: bool | None = None, force: bool = False
288
+ ) -> None:
289
+ """
290
+ Syncs a local Trackio project's database to a Hugging Face Space.
291
+ If the Space does not exist, it will be created.
292
+
293
+ Args:
294
+ project (`str`): The name of the project to upload.
295
+ space_id (`str`): The ID of the Space to upload to (e.g., `"username/space_id"`).
296
+ private (`bool`, *optional*):
297
+ Whether to make the Space private. If None (default), the repo will be
298
+ public unless the organization's default is private. This value is ignored
299
+ if the repo already exists.
300
+ force (`bool`, *optional*, defaults to `False`):
301
+ If `True`, overwrite the existing database without prompting for confirmation.
302
+ If `False`, prompt the user before overwriting an existing database.
303
+ """
304
+ space_id, _ = preprocess_space_and_dataset_ids(space_id, None)
305
+ try:
306
+ create_space_if_not_exists(space_id, private=private)
307
+ wait_until_space_exists(space_id)
308
+ upload_db_to_space(project, space_id, force=force)
309
+ print(f"Synced successfully to space: {SPACE_URL.format(space_id=space_id)}")
310
+ except Exception as e:
311
+ print(f"Failed to sync to space: {e}")
dummy_commit_scheduler.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A dummy object to fit the interface of huggingface_hub's CommitScheduler
2
+ class DummyCommitSchedulerLock:
3
+ def __enter__(self):
4
+ return None
5
+
6
+ def __exit__(self, exception_type, exception_value, exception_traceback):
7
+ pass
8
+
9
+
10
+ class DummyCommitScheduler:
11
+ def __init__(self):
12
+ self.lock = DummyCommitSchedulerLock()
histogram.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ import numpy as np
4
+
5
+
6
+ class Histogram:
7
+ """
8
+ Histogram data type for Trackio, compatible with wandb.Histogram.
9
+
10
+ Example:
11
+ ```python
12
+ import trackio
13
+ import numpy as np
14
+
15
+ # Create histogram from sequence
16
+ data = np.random.randn(1000)
17
+ trackio.log({"distribution": trackio.Histogram(data)})
18
+
19
+ # Create histogram from numpy histogram
20
+ hist, bins = np.histogram(data, bins=30)
21
+ trackio.log({"distribution": trackio.Histogram(np_histogram=(hist, bins))})
22
+
23
+ # Specify custom number of bins
24
+ trackio.log({"distribution": trackio.Histogram(data, num_bins=50)})
25
+ ```
26
+
27
+ Args:
28
+ sequence: Optional sequence of values to create histogram from
29
+ np_histogram: Optional pre-computed numpy histogram (hist, bins) tuple
30
+ num_bins: Number of bins for the histogram (default 64, max 512)
31
+ """
32
+
33
+ TYPE = "trackio.histogram"
34
+
35
+ def __init__(
36
+ self,
37
+ sequence: Any = None,
38
+ np_histogram: tuple | None = None,
39
+ num_bins: int = 64,
40
+ ):
41
+ if sequence is None and np_histogram is None:
42
+ raise ValueError("Must provide either sequence or np_histogram")
43
+
44
+ if sequence is not None and np_histogram is not None:
45
+ raise ValueError("Cannot provide both sequence and np_histogram")
46
+
47
+ num_bins = min(num_bins, 512)
48
+
49
+ if np_histogram is not None:
50
+ self.histogram, self.bins = np_histogram
51
+ self.histogram = np.asarray(self.histogram)
52
+ self.bins = np.asarray(self.bins)
53
+ else:
54
+ data = np.asarray(sequence).flatten()
55
+ data = data[np.isfinite(data)]
56
+ if len(data) == 0:
57
+ self.histogram = np.array([])
58
+ self.bins = np.array([])
59
+ else:
60
+ self.histogram, self.bins = np.histogram(data, bins=num_bins)
61
+
62
+ def _to_dict(self) -> dict:
63
+ """Convert histogram to dictionary for storage."""
64
+ return {
65
+ "_type": self.TYPE,
66
+ "bins": self.bins.tolist(),
67
+ "values": self.histogram.tolist(),
68
+ }
imports.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+
6
+ from trackio import deploy, utils
7
+ from trackio.sqlite_storage import SQLiteStorage
8
+
9
+
10
+ def import_csv(
11
+ csv_path: str | Path,
12
+ project: str,
13
+ name: str | None = None,
14
+ space_id: str | None = None,
15
+ dataset_id: str | None = None,
16
+ private: bool | None = None,
17
+ force: bool = False,
18
+ ) -> None:
19
+ """
20
+ Imports a CSV file into a Trackio project. The CSV file must contain a `"step"`
21
+ column, may optionally contain a `"timestamp"` column, and any other columns will be
22
+ treated as metrics. It should also include a header row with the column names.
23
+
24
+ TODO: call init() and return a Run object so that the user can continue to log metrics to it.
25
+
26
+ Args:
27
+ csv_path (`str` or `Path`):
28
+ The str or Path to the CSV file to import.
29
+ project (`str`):
30
+ The name of the project to import the CSV file into. Must not be an existing
31
+ project.
32
+ name (`str`, *optional*):
33
+ The name of the Run to import the CSV file into. If not provided, a default
34
+ name will be generated.
35
+ name (`str`, *optional*):
36
+ The name of the run (if not provided, a default name will be generated).
37
+ space_id (`str`, *optional*):
38
+ If provided, the project will be logged to a Hugging Face Space instead of a
39
+ local directory. Should be a complete Space name like `"username/reponame"`
40
+ or `"orgname/reponame"`, or just `"reponame"` in which case the Space will
41
+ be created in the currently-logged-in Hugging Face user's namespace. If the
42
+ Space does not exist, it will be created. If the Space already exists, the
43
+ project will be logged to it.
44
+ dataset_id (`str`, *optional*):
45
+ If provided, a persistent Hugging Face Dataset will be created and the
46
+ metrics will be synced to it every 5 minutes. Should be a complete Dataset
47
+ name like `"username/datasetname"` or `"orgname/datasetname"`, or just
48
+ `"datasetname"` in which case the Dataset will be created in the
49
+ currently-logged-in Hugging Face user's namespace. If the Dataset does not
50
+ exist, it will be created. If the Dataset already exists, the project will
51
+ be appended to it. If not provided, the metrics will be logged to a local
52
+ SQLite database, unless a `space_id` is provided, in which case a Dataset
53
+ will be automatically created with the same name as the Space but with the
54
+ `"_dataset"` suffix.
55
+ private (`bool`, *optional*):
56
+ Whether to make the Space private. If None (default), the repo will be
57
+ public unless the organization's default is private. This value is ignored
58
+ if the repo already exists.
59
+ """
60
+ if SQLiteStorage.get_runs(project):
61
+ raise ValueError(
62
+ f"Project '{project}' already exists. Cannot import CSV into existing project."
63
+ )
64
+
65
+ csv_path = Path(csv_path)
66
+ if not csv_path.exists():
67
+ raise FileNotFoundError(f"CSV file not found: {csv_path}")
68
+
69
+ df = pd.read_csv(csv_path)
70
+ if df.empty:
71
+ raise ValueError("CSV file is empty")
72
+
73
+ column_mapping = utils.simplify_column_names(df.columns.tolist())
74
+ df = df.rename(columns=column_mapping)
75
+
76
+ step_column = None
77
+ for col in df.columns:
78
+ if col.lower() == "step":
79
+ step_column = col
80
+ break
81
+
82
+ if step_column is None:
83
+ raise ValueError("CSV file must contain a 'step' or 'Step' column")
84
+
85
+ if name is None:
86
+ name = csv_path.stem
87
+
88
+ metrics_list = []
89
+ steps = []
90
+ timestamps = []
91
+
92
+ numeric_columns = []
93
+ for column in df.columns:
94
+ if column == step_column:
95
+ continue
96
+ if column == "timestamp":
97
+ continue
98
+
99
+ try:
100
+ pd.to_numeric(df[column], errors="raise")
101
+ numeric_columns.append(column)
102
+ except (ValueError, TypeError):
103
+ continue
104
+
105
+ for _, row in df.iterrows():
106
+ metrics = {}
107
+ for column in numeric_columns:
108
+ value = row[column]
109
+ if bool(pd.notna(value)):
110
+ metrics[column] = float(value)
111
+
112
+ if metrics:
113
+ metrics_list.append(metrics)
114
+ steps.append(int(row[step_column]))
115
+
116
+ if "timestamp" in df.columns and bool(pd.notna(row["timestamp"])):
117
+ timestamps.append(str(row["timestamp"]))
118
+ else:
119
+ timestamps.append("")
120
+
121
+ if metrics_list:
122
+ SQLiteStorage.bulk_log(
123
+ project=project,
124
+ run=name,
125
+ metrics_list=metrics_list,
126
+ steps=steps,
127
+ timestamps=timestamps,
128
+ )
129
+
130
+ print(
131
+ f"* Imported {len(metrics_list)} rows from {csv_path} into project '{project}' as run '{name}'"
132
+ )
133
+ print(f"* Metrics found: {', '.join(metrics_list[0].keys())}")
134
+
135
+ space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
136
+ if dataset_id is not None:
137
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
138
+ print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
139
+
140
+ if space_id is None:
141
+ utils.print_dashboard_instructions(project)
142
+ else:
143
+ deploy.create_space_if_not_exists(
144
+ space_id=space_id, dataset_id=dataset_id, private=private
145
+ )
146
+ deploy.wait_until_space_exists(space_id=space_id)
147
+ deploy.upload_db_to_space(project=project, space_id=space_id, force=force)
148
+ print(
149
+ f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
150
+ )
151
+
152
+
153
+ def import_tf_events(
154
+ log_dir: str | Path,
155
+ project: str,
156
+ name: str | None = None,
157
+ space_id: str | None = None,
158
+ dataset_id: str | None = None,
159
+ private: bool | None = None,
160
+ force: bool = False,
161
+ ) -> None:
162
+ """
163
+ Imports TensorFlow Events files from a directory into a Trackio project. Each
164
+ subdirectory in the log directory will be imported as a separate run.
165
+
166
+ Args:
167
+ log_dir (`str` or `Path`):
168
+ The str or Path to the directory containing TensorFlow Events files.
169
+ project (`str`):
170
+ The name of the project to import the TensorFlow Events files into. Must not
171
+ be an existing project.
172
+ name (`str`, *optional*):
173
+ The name prefix for runs (if not provided, will use directory names). Each
174
+ subdirectory will create a separate run.
175
+ space_id (`str`, *optional*):
176
+ If provided, the project will be logged to a Hugging Face Space instead of a
177
+ local directory. Should be a complete Space name like `"username/reponame"`
178
+ or `"orgname/reponame"`, or just `"reponame"` in which case the Space will
179
+ be created in the currently-logged-in Hugging Face user's namespace. If the
180
+ Space does not exist, it will be created. If the Space already exists, the
181
+ project will be logged to it.
182
+ dataset_id (`str`, *optional*):
183
+ If provided, a persistent Hugging Face Dataset will be created and the
184
+ metrics will be synced to it every 5 minutes. Should be a complete Dataset
185
+ name like `"username/datasetname"` or `"orgname/datasetname"`, or just
186
+ `"datasetname"` in which case the Dataset will be created in the
187
+ currently-logged-in Hugging Face user's namespace. If the Dataset does not
188
+ exist, it will be created. If the Dataset already exists, the project will
189
+ be appended to it. If not provided, the metrics will be logged to a local
190
+ SQLite database, unless a `space_id` is provided, in which case a Dataset
191
+ will be automatically created with the same name as the Space but with the
192
+ `"_dataset"` suffix.
193
+ private (`bool`, *optional*):
194
+ Whether to make the Space private. If None (default), the repo will be
195
+ public unless the organization's default is private. This value is ignored
196
+ if the repo already exists.
197
+ """
198
+ try:
199
+ from tbparse import SummaryReader
200
+ except ImportError:
201
+ raise ImportError(
202
+ "The `tbparse` package is not installed but is required for `import_tf_events`. Please install trackio with the `tensorboard` extra: `pip install trackio[tensorboard]`."
203
+ )
204
+
205
+ if SQLiteStorage.get_runs(project):
206
+ raise ValueError(
207
+ f"Project '{project}' already exists. Cannot import TF events into existing project."
208
+ )
209
+
210
+ path = Path(log_dir)
211
+ if not path.exists():
212
+ raise FileNotFoundError(f"TF events directory not found: {path}")
213
+
214
+ # Use tbparse to read all tfevents files in the directory structure
215
+ reader = SummaryReader(str(path), extra_columns={"dir_name"})
216
+ df = reader.scalars
217
+
218
+ if df.empty:
219
+ raise ValueError(f"No TensorFlow events data found in {path}")
220
+
221
+ total_imported = 0
222
+ imported_runs = []
223
+
224
+ # Group by dir_name to create separate runs
225
+ for dir_name, group_df in df.groupby("dir_name"):
226
+ try:
227
+ # Determine run name based on directory name
228
+ if dir_name == "":
229
+ run_name = "main" # For files in the root directory
230
+ else:
231
+ run_name = dir_name # Use directory name
232
+
233
+ if name:
234
+ run_name = f"{name}_{run_name}"
235
+
236
+ if group_df.empty:
237
+ print(f"* Skipping directory {dir_name}: no scalar data found")
238
+ continue
239
+
240
+ metrics_list = []
241
+ steps = []
242
+ timestamps = []
243
+
244
+ for _, row in group_df.iterrows():
245
+ # Convert row values to appropriate types
246
+ tag = str(row["tag"])
247
+ value = float(row["value"])
248
+ step = int(row["step"])
249
+
250
+ metrics = {tag: value}
251
+ metrics_list.append(metrics)
252
+ steps.append(step)
253
+
254
+ # Use wall_time if present, else fallback
255
+ if "wall_time" in group_df.columns and not bool(
256
+ pd.isna(row["wall_time"])
257
+ ):
258
+ timestamps.append(str(row["wall_time"]))
259
+ else:
260
+ timestamps.append("")
261
+
262
+ if metrics_list:
263
+ SQLiteStorage.bulk_log(
264
+ project=project,
265
+ run=str(run_name),
266
+ metrics_list=metrics_list,
267
+ steps=steps,
268
+ timestamps=timestamps,
269
+ )
270
+
271
+ total_imported += len(metrics_list)
272
+ imported_runs.append(run_name)
273
+
274
+ print(
275
+ f"* Imported {len(metrics_list)} scalar events from directory '{dir_name}' as run '{run_name}'"
276
+ )
277
+ print(f"* Metrics in this run: {', '.join(set(group_df['tag']))}")
278
+
279
+ except Exception as e:
280
+ print(f"* Error processing directory {dir_name}: {e}")
281
+ continue
282
+
283
+ if not imported_runs:
284
+ raise ValueError("No valid TensorFlow events data could be imported")
285
+
286
+ print(f"* Total imported events: {total_imported}")
287
+ print(f"* Created runs: {', '.join(imported_runs)}")
288
+
289
+ space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
290
+ if dataset_id is not None:
291
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
292
+ print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
293
+
294
+ if space_id is None:
295
+ utils.print_dashboard_instructions(project)
296
+ else:
297
+ deploy.create_space_if_not_exists(
298
+ space_id, dataset_id=dataset_id, private=private
299
+ )
300
+ deploy.wait_until_space_exists(space_id)
301
+ deploy.upload_db_to_space(project, space_id, force=force)
302
+ print(
303
+ f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
304
+ )
media/__init__.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Media module for Trackio.
3
+
4
+ This module contains all media-related functionality including:
5
+ - TrackioImage, TrackioVideo, TrackioAudio classes
6
+ - Video writing utilities
7
+ - Audio conversion utilities
8
+ """
9
+
10
+ try:
11
+ from trackio.media.audio_writer import write_audio
12
+ from trackio.media.file_storage import FileStorage
13
+ from trackio.media.media import (
14
+ TrackioAudio,
15
+ TrackioImage,
16
+ TrackioMedia,
17
+ TrackioVideo,
18
+ )
19
+ from trackio.media.video_writer import write_video
20
+ except ImportError:
21
+ from media.audio_writer import write_audio
22
+ from media.file_storage import FileStorage
23
+ from media.media import TrackioAudio, TrackioImage, TrackioMedia, TrackioVideo
24
+ from media.video_writer import write_video
25
+
26
+ __all__ = [
27
+ "TrackioMedia",
28
+ "TrackioImage",
29
+ "TrackioVideo",
30
+ "TrackioAudio",
31
+ "FileStorage",
32
+ "write_video",
33
+ "write_audio",
34
+ ]