Scan Database Utils
ScanDatabase builder for fast query¶
Build a comprehensive, fast query-able parque 'scans database'
see API documentation for more details
In [6]:
Copied!
from datetime import date
from geecs_data_utils import ScanPaths
from geecs_data_utils.scans_database.builder import ScanDatabaseBuilder
# Inputs
data_root = ScanPaths.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"
date_range = (date(2022, 1, 3), date(2025, 8, 5))
ScanDatabaseBuilder.stream_to_parquet(
data_root=data_root,
experiment=experiment,
output_path=output_path,
date_range=date_range,
buffer_size=50,
max_scans=1000000,
mode="overwrite", # or 'append' or 'overwrite'
)
from datetime import date
from geecs_data_utils import ScanPaths
from geecs_data_utils.scans_database.builder import ScanDatabaseBuilder
# Inputs
data_root = ScanPaths.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"
date_range = (date(2022, 1, 3), date(2025, 8, 5))
ScanDatabaseBuilder.stream_to_parquet(
data_root=data_root,
experiment=experiment,
output_path=output_path,
date_range=date_range,
buffer_size=50,
max_scans=1000000,
mode="overwrite", # or 'append' or 'overwrite'
)
After database is initially built, new scan entries are easily added using mode='append' and data_range = None
In [7]:
Copied!
# Picks up from the last date in _update_log.json through today
ScanDatabaseBuilder.stream_to_parquet(
data_root=data_root,
experiment=experiment,
output_path=output_path,
date_range=None, # <- auto-resume via sidecar
buffer_size=50,
mode="append",
)
# Picks up from the last date in _update_log.json through today
ScanDatabaseBuilder.stream_to_parquet(
data_root=data_root,
experiment=experiment,
output_path=output_path,
date_range=None, # <- auto-resume via sidecar
buffer_size=50,
mode="append",
)
Filter scans database based on many flexible criteria
In [5]:
Copied!
from geecs_data_utils.scans_database.database import ScanDatabase
from datetime import date
data_root = ScanPaths.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"
# initialize ScanDabase object
db = ScanDatabase(output_path)
# apply filters
# Filter by date first, for fast initial filter
db.date_range(date(2025, 8, 5), date(2025, 8, 8))
# can filter by scanparameter, case insensitive. use alias, var name or any part
db.filter_scan_parameter_contains("shotnumber")
# Filter by ecs live dump entries: (device_like, variable_like, target value, tolerance)
db.filter_ecs_value_within("hexapod", "y", target=18.5, tol=0.5)
# use defined named filters autoloaded based on experiment name. Can have various date range validities
db.apply("PMQ_inserted")
# convert result to dataframe
df = db.to_df()
print("rows:", len(df))
df.head(5)
from geecs_data_utils.scans_database.database import ScanDatabase
from datetime import date
data_root = ScanPaths.paths_config.base_path
experiment = "Undulator"
output_path = data_root / experiment / "scan_database_parquet"
# initialize ScanDabase object
db = ScanDatabase(output_path)
# apply filters
# Filter by date first, for fast initial filter
db.date_range(date(2025, 8, 5), date(2025, 8, 8))
# can filter by scanparameter, case insensitive. use alias, var name or any part
db.filter_scan_parameter_contains("shotnumber")
# Filter by ecs live dump entries: (device_like, variable_like, target value, tolerance)
db.filter_ecs_value_within("hexapod", "y", target=18.5, tol=0.5)
# use defined named filters autoloaded based on experiment name. Can have various date range validities
db.apply("PMQ_inserted")
# convert result to dataframe
df = db.to_df()
print("rows:", len(df))
df.head(5)
[INFO] Loaded 1/1 filters from undulator.yml rows: 10
Out[5]:
| day | number | experiment | scalar_data_file | tdms_file | non_scalar_devices | scan_parameter | start | end | step_size | ... | scan_mode | scan_description | background | scan_metadata_raw_fields | ecs_dump | has_analysis_dir | notes | year | month | __ecs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 5 | 2 | Undulator | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | [UC_BCaveMagSpecCam1, UC_BCaveMagSpecCam1-inte... | Shotnumber | 0 | 1 | 1 | ... | noscan | . scanning Shotnumber. None | 0 | {"Scan No": "2", "ScanStartInfo": ". scanning ... | {"experiment_name": "Undulator", "devices": [{... | 1 | <NA> | 2025 | 8 | {'experiment_name': 'Undulator', 'devices': [{... |
| 4 | 5 | 3 | Undulator | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | [UC_BCaveMagSpecCam1, UC_BCaveMagSpecCam1-inte... | Shotnumber | 0 | 1 | 1 | ... | noscan | no scan for EMP diagnostics. scanning Shotnumb... | 0 | {"Scan No": "3", "ScanStartInfo": "no scan for... | {"experiment_name": "Undulator", "devices": [{... | 1 | <NA> | 2025 | 8 | {'experiment_name': 'Undulator', 'devices': [{... |
| 6 | 5 | 5 | Undulator | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | [UC_BCaveMagSpecCam1, UC_BCaveMagSpecCam1-inte... | Shotnumber | 0 | 1 | 1 | ... | noscan | no scan. scanning Shotnumber. None | 0 | {"Scan No": "5", "ScanStartInfo": "no scan. sc... | {"experiment_name": "Undulator", "devices": [{... | 1 | <NA> | 2025 | 8 | {'experiment_name': 'Undulator', 'devices': [{... |
| 7 | 5 | 6 | Undulator | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | [UC_BCaveMagSpecCam1, UC_BCaveMagSpecCam1-inte... | Shotnumber | 0 | 1 | 1 | ... | noscan | no scan. scanning Shotnumber. None | 0 | {"Scan No": "6", "ScanStartInfo": "no scan. sc... | {"experiment_name": "Undulator", "devices": [{... | 1 | <NA> | 2025 | 8 | {'experiment_name': 'Undulator', 'devices': [{... |
| 8 | 5 | 7 | Undulator | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | Z:\data\Undulator\Y2025\08-Aug\25_0805\scans\S... | [UC_ALineEBeam3, U_BCaveICT, U_RTA4000, Z_Test... | Shotnumber | 0 | 1 | 1 | ... | noscan | no scan opn bhaci bam. scanning Shotnumber. None | 0 | {"Scan No": "7", "ScanStartInfo": "no scan opn... | {"experiment_name": "Undulator", "devices": [{... | 1 | <NA> | 2025 | 8 | {'experiment_name': 'Undulator', 'devices': [{... |
5 rows × 21 columns
In [ ]:
Copied!