from __future__ import annotations
from pathlib import Path
import click
def _check_sphinx_installed():
"""Check if Sphinx dependencies are installed.
:raises SystemExit: If Sphinx dependencies are not installed.
"""
try:
import jinja2 # noqa: F401
import sphinx # noqa: F401
except ImportError:
click.echo("Error: Sphinx dependencies not installed.", err=True)
click.echo("", err=True)
click.echo("Install the full package:", err=True)
click.echo(' pip install "chartbook[all]"', err=True)
click.echo("", err=True)
click.echo("Or use pipx for isolated installation:", err=True)
click.echo(' pipx install "chartbook[all]"', err=True)
click.echo(' pipx run "chartbook[all]" build', err=True)
raise SystemExit(1)
@click.group()
def main():
"""chartbook CLI tool for generating documentation websites."""
@main.command()
@click.argument("output_dir", type=click.Path(), default="./docs", required=False)
@click.option("--project-dir", type=click.Path(), help="Path to project directory")
@click.option(
"--publish-dir",
type=click.Path(),
default="./_output/to_be_published/",
help="Directory where files will be published",
)
@click.option(
"--docs-build-dir",
type=click.Path(),
default="./_docs",
help="Directory where documentation will be built",
)
@click.option(
"--temp-docs-src-dir",
type=click.Path(),
default="./_docs_src",
help="Directory where documentation source files are temporarily stored in two stage procedure",
)
@click.option(
"--keep-build-dirs",
is_flag=True,
default=False,
help="Keep temporary build directory after generation",
)
@click.option(
"--force-write",
"-f",
is_flag=True,
default=False,
help="Overwrite existing output directory by deleting it first",
)
@click.option(
"--size-threshold",
type=float,
default=50,
help="File size threshold in MB above which to use memory-efficient loading (default: 50)",
)
@click.option(
"--warn-missing",
is_flag=True,
default=False,
help="Warn instead of error when source files (charts, notebooks, dataframes) are missing",
)
def build(
output_dir,
project_dir,
publish_dir,
docs_build_dir,
temp_docs_src_dir,
keep_build_dirs,
force_write,
size_threshold,
warn_missing,
):
"""Generate HTML documentation in the specified output directory.
:param output_dir: Directory where output will be generated.
:type output_dir: str
:param project_dir: Root directory of the project.
:type project_dir: str
:param publish_dir: Directory where files will be published.
:type publish_dir: str
:param docs_build_dir: Directory where documentation will be built.
:type docs_build_dir: str
:param temp_docs_src_dir: Temporary directory for documentation source files.
:type temp_docs_src_dir: str
:param keep_build_dirs: If True, keeps temporary build directory after generation.
:type keep_build_dirs: bool
:param force_write: If True, overwrites existing output directory.
:type force_write: bool
:param size_threshold: File size threshold in MB above which to use memory-efficient loading.
:type size_threshold: float
:param warn_missing: If True, warn instead of error when source files are missing.
:type warn_missing: bool
"""
# Check for Sphinx dependencies
_check_sphinx_installed()
# Import here to avoid loading Sphinx deps at module level
from chartbook.build_docs import generate_docs
# Convert output_dir to Path
output_dir = Path(output_dir).resolve()
# Prevent deleting the current working directory
if output_dir == Path.cwd():
raise click.UsageError(
"Output directory cannot be the current directory '.' to prevent accidental project deletion"
)
# Check if output directory exists and prompt for confirmation
if output_dir.exists() and not force_write and any(output_dir.iterdir()):
if not click.confirm(
f"Directory '{output_dir}' already exists. Do you want to overwrite it?\n"
"(add the -f/--force option to overwrite without prompting)",
default=False,
):
raise SystemExit(0)
force_write = True
# If project_dir not provided, use current directory
project_dir = resolve_project_dir(project_dir)
# Check for config file and create if needed
config_path = project_dir / "chartbook.toml"
if not config_path.exists():
raise ValueError(f"Could not find chartbook.toml at {config_path}")
# Store whether we need to remove existing directory after successful generation
should_remove_existing = output_dir.exists() and force_write
generate_docs(
output_dir=output_dir,
project_dir=project_dir,
publish_dir=publish_dir,
_docs_dir=docs_build_dir,
temp_docs_src_dir=temp_docs_src_dir,
keep_build_dirs=keep_build_dirs,
should_remove_existing=should_remove_existing,
size_threshold=size_threshold,
warn_missing=warn_missing,
)
click.echo(f"Successfully generated documentation in {output_dir}")
@main.command()
@click.option(
"--publish-dir",
type=click.Path(),
default=None,
help="Directory where files will be published",
)
@click.option("--project-dir", type=click.Path(), help="Path to project directory")
@click.option(
"--verbose",
"-v",
is_flag=True,
default=False,
help="Enable verbose output",
)
def publish(publish_dir: Path | str | None, project_dir: Path | str, verbose: bool):
"""Publish the documentation to the specified output directory.
If no publish directory is provided, a default local directory will be used.
:param publish_dir: Directory where files will be published.
:type publish_dir: Path or str, optional
:param project_dir: Root directory of the project.
:type project_dir: Path or str
:param verbose: If True, enables verbose output.
:type verbose: bool
"""
# Check for Sphinx dependencies
_check_sphinx_installed()
# Import here to avoid loading Sphinx deps at module level
from chartbook.manifest import load_manifest
from chartbook.publish import publish_pipeline
project_dir = resolve_project_dir(project_dir)
manifest = load_manifest(base_dir=project_dir)
pipeline_id = manifest["pipeline"]["id"]
if publish_dir is None:
BASE_DIR = Path(".").resolve()
publish_dir = BASE_DIR / Path("./_output/to_be_published")
else:
publish_dir = Path(publish_dir) / pipeline_id
# if publish_dir is a relative path, convert it to an absolute path relative to the project directory
if not publish_dir.is_absolute():
publish_dir = project_dir / Path(publish_dir)
publish_pipeline(publish_dir=publish_dir, base_dir=project_dir, verbose=verbose)
[docs]
def resolve_project_dir(project_dir: Path | None):
"""Resolve the project directory to an absolute path.
:param project_dir: The project directory path, or None to use cwd.
:type project_dir: Path, optional
:returns: The resolved absolute path to the project directory.
:rtype: Path
"""
if project_dir is None:
project_dir = Path.cwd()
else:
project_dir = Path(project_dir).resolve()
return project_dir
@main.command()
@click.option(
"--no-samples",
is_flag=True,
default=False,
help="Exclude sample values sections from the report",
)
@click.option(
"--no-stats",
is_flag=True,
default=False,
help="Exclude numeric column statistics sections from the report",
)
@click.option(
"--output-dir",
"-o",
type=click.Path(),
default=None,
help="Directory to save the output file (default: current directory)",
)
@click.option(
"--size-threshold",
type=float,
default=50,
help="File size threshold in MB above which to use memory-efficient loading (default: 50)",
)
def create_data_glimpses(no_samples, no_stats, output_dir, size_threshold):
"""Create a data glimpses report from dodo.py tasks.
This command parses the dodo.py file in the current directory to find all
CSV/Parquet files and creates a comprehensive data glimpse report in Markdown format.
:param no_samples: If True, exclude sample values sections from the report.
:type no_samples: bool
:param no_stats: If True, exclude numeric column statistics sections from the report.
:type no_stats: bool
:param output_dir: Directory to save the output file.
:type output_dir: str, optional
:param size_threshold: File size threshold in MB above which to use memory-efficient loading.
:type size_threshold: float
Example usage:
chartbook create-data-glimpses
chartbook create-data-glimpses --no-samples
chartbook create-data-glimpses --no-samples --no-stats
chartbook create-data-glimpses -o ./docs/
chartbook create-data-glimpses --size-threshold 100
"""
from chartbook.create_data_glimpses import main as create_data_glimpses_main
try:
create_data_glimpses_main(
output_dir=output_dir,
no_samples=no_samples,
no_stats=no_stats,
size_threshold=size_threshold,
)
except FileNotFoundError as e:
click.echo(f"Error: {e}", err=True)
import sys
sys.exit(1)
except Exception as e:
click.echo(f"Error generating data glimpses: {e}", err=True)
import sys
sys.exit(1)
# =============================================================================
# ls command group - List catalog objects
# =============================================================================
def _load_catalog_for_cli(catalog_path=None):
"""Load manifest from catalog path or default settings.
:param catalog_path: Optional path to catalog chartbook.toml.
:type catalog_path: str or Path, optional
:returns: Tuple of (manifest, resolved_catalog_path).
:rtype: tuple
:raises SystemExit: If no catalog is configured.
"""
from chartbook.data import _resolve_catalog_path
from chartbook.errors import CatalogNotConfiguredError
from chartbook.manifest import load_manifest
try:
resolved = _resolve_catalog_path(catalog_path)
except CatalogNotConfiguredError as e:
click.echo(f"Error: {e}", err=True)
click.echo("", err=True)
click.echo("Run 'chartbook config' to set a default catalog.", err=True)
raise SystemExit(1)
manifest = load_manifest(base_dir=resolved.parent)
return manifest, resolved
def _get_pipeline_name(pipeline_manifest):
"""Extract pipeline name from manifest.
:param pipeline_manifest: The pipeline manifest dict.
:type pipeline_manifest: dict
:returns: The pipeline name or 'Unknown'.
:rtype: str
"""
if "pipeline" in pipeline_manifest:
return pipeline_manifest["pipeline"].get("pipeline_name", "Unknown")
return "Unknown"
@main.group(invoke_without_command=True)
@click.option("--catalog", type=click.Path(), help="Path to catalog chartbook.toml")
@click.pass_context
def ls(ctx, catalog):
"""List catalog objects (pipelines, dataframes, charts).
Without a subcommand, lists all objects in a tree format.
Use subcommands to list specific object types.
Examples:
chartbook ls
chartbook ls pipelines
chartbook ls dataframes
chartbook ls charts
"""
ctx.ensure_object(dict)
ctx.obj["catalog"] = catalog
if ctx.invoked_subcommand is None:
# List everything in tree format
manifest, catalog_path = _load_catalog_for_cli(catalog)
click.echo(f"Catalog: {catalog_path}")
click.echo("")
if manifest["config"]["type"] == "catalog":
# Catalog with multiple pipelines
for pipeline_id in sorted(manifest["pipelines"].keys()):
pipeline_manifest = manifest["pipelines"][pipeline_id]
pipeline_name = _get_pipeline_name(pipeline_manifest)
click.echo(f"[pipeline] {pipeline_id}: {pipeline_name}")
# List dataframes under this pipeline
if "dataframes" in pipeline_manifest:
for df_id in sorted(pipeline_manifest["dataframes"].keys()):
df_name = pipeline_manifest["dataframes"][df_id].get(
"dataframe_name", "Unknown"
)
click.echo(f" [dataframe] {pipeline_id}/{df_id}: {df_name}")
# List charts under this pipeline
if "charts" in pipeline_manifest:
for chart_id in sorted(pipeline_manifest["charts"].keys()):
chart_name = pipeline_manifest["charts"][chart_id].get(
"chart_name", "Unknown"
)
click.echo(f" [chart] {pipeline_id}/{chart_id}: {chart_name}")
else:
# Single pipeline
pipeline_id = manifest["pipeline"]["id"]
pipeline_name = _get_pipeline_name(manifest)
click.echo(f"[pipeline] {pipeline_id}: {pipeline_name}")
if "dataframes" in manifest:
for df_id in sorted(manifest["dataframes"].keys()):
df_name = manifest["dataframes"][df_id].get(
"dataframe_name", "Unknown"
)
click.echo(f" [dataframe] {pipeline_id}/{df_id}: {df_name}")
if "charts" in manifest:
for chart_id in sorted(manifest["charts"].keys()):
chart_name = manifest["charts"][chart_id].get(
"chart_name", "Unknown"
)
click.echo(f" [chart] {pipeline_id}/{chart_id}: {chart_name}")
@ls.command("pipelines")
@click.pass_context
def ls_pipelines(ctx):
"""List all pipelines."""
catalog = ctx.obj.get("catalog")
manifest, _ = _load_catalog_for_cli(catalog)
if manifest["config"]["type"] == "catalog":
for pipeline_id in sorted(manifest["pipelines"].keys()):
pipeline_manifest = manifest["pipelines"][pipeline_id]
pipeline_name = _get_pipeline_name(pipeline_manifest)
click.echo(f"{pipeline_id}: {pipeline_name}")
else:
pipeline_id = manifest["pipeline"]["id"]
pipeline_name = _get_pipeline_name(manifest)
click.echo(f"{pipeline_id}: {pipeline_name}")
@ls.command("dataframes")
@click.pass_context
def ls_dataframes(ctx):
"""List all dataframes across pipelines."""
catalog = ctx.obj.get("catalog")
manifest, _ = _load_catalog_for_cli(catalog)
if manifest["config"]["type"] == "catalog":
for pipeline_id in sorted(manifest["pipelines"].keys()):
pipeline_manifest = manifest["pipelines"][pipeline_id]
if "dataframes" in pipeline_manifest:
for df_id in sorted(pipeline_manifest["dataframes"].keys()):
df_name = pipeline_manifest["dataframes"][df_id].get(
"dataframe_name", "Unknown"
)
click.echo(f"{pipeline_id}/{df_id}: {df_name}")
else:
pipeline_id = manifest["pipeline"]["id"]
if "dataframes" in manifest:
for df_id in sorted(manifest["dataframes"].keys()):
df_name = manifest["dataframes"][df_id].get("dataframe_name", "Unknown")
click.echo(f"{pipeline_id}/{df_id}: {df_name}")
@ls.command("charts")
@click.pass_context
def ls_charts(ctx):
"""List all charts across pipelines."""
catalog = ctx.obj.get("catalog")
manifest, _ = _load_catalog_for_cli(catalog)
if manifest["config"]["type"] == "catalog":
for pipeline_id in sorted(manifest["pipelines"].keys()):
pipeline_manifest = manifest["pipelines"][pipeline_id]
if "charts" in pipeline_manifest:
for chart_id in sorted(pipeline_manifest["charts"].keys()):
chart_name = pipeline_manifest["charts"][chart_id].get(
"chart_name", "Unknown"
)
click.echo(f"{pipeline_id}/{chart_id}: {chart_name}")
else:
pipeline_id = manifest["pipeline"]["id"]
if "charts" in manifest:
for chart_id in sorted(manifest["charts"].keys()):
chart_name = manifest["charts"][chart_id].get("chart_name", "Unknown")
click.echo(f"{pipeline_id}/{chart_id}: {chart_name}")
# =============================================================================
# data command group - Data operations
# =============================================================================
@main.group()
def data():
"""Data operations (get paths, docs)."""
pass
@data.command("get-path")
@click.option("--pipeline", required=True, help="Pipeline ID")
@click.option("--dataframe", required=True, help="Dataframe ID")
@click.option("--catalog", type=click.Path(), help="Path to catalog chartbook.toml")
def data_get_path(pipeline, dataframe, catalog):
"""Get the path to a dataframe's parquet file.
Examples:
chartbook data get-path --pipeline yield_curve --dataframe repo_public
"""
from chartbook.data import get_data_path
from chartbook.errors import CatalogNotConfiguredError
try:
path = get_data_path(pipeline, dataframe, catalog_path=catalog)
click.echo(str(path))
except CatalogNotConfiguredError as e:
click.echo(f"Error: {e}", err=True)
click.echo("", err=True)
click.echo("Run 'chartbook config' to set a default catalog.", err=True)
raise SystemExit(1)
except KeyError as e:
click.echo(f"Error: {e}", err=True)
raise SystemExit(1)
@data.command("get-docs")
@click.option("--pipeline", required=True, help="Pipeline ID")
@click.option("--dataframe", required=True, help="Dataframe ID")
@click.option("--catalog", type=click.Path(), help="Path to catalog chartbook.toml")
def data_get_docs(pipeline, dataframe, catalog):
"""Print documentation content for a dataframe.
Examples:
chartbook data get-docs --pipeline yield_curve --dataframe repo_public
"""
from chartbook.data import get_docs
from chartbook.errors import CatalogNotConfiguredError
try:
docs = get_docs(pipeline, dataframe, catalog_path=catalog)
click.echo(docs)
except CatalogNotConfiguredError as e:
click.echo(f"Error: {e}", err=True)
click.echo("", err=True)
click.echo("Run 'chartbook config' to set a default catalog.", err=True)
raise SystemExit(1)
except KeyError as e:
click.echo(f"Error: {e}", err=True)
raise SystemExit(1)
except FileNotFoundError as e:
click.echo(f"Error: Documentation file not found: {e}", err=True)
raise SystemExit(1)
@data.command("get-docs-path")
@click.option("--pipeline", required=True, help="Pipeline ID")
@click.option("--dataframe", required=True, help="Dataframe ID")
@click.option("--catalog", type=click.Path(), help="Path to catalog chartbook.toml")
def data_get_docs_path(pipeline, dataframe, catalog):
"""Get the path to a dataframe's documentation source.
Examples:
chartbook data get-docs-path --pipeline yield_curve --dataframe repo_public
"""
from chartbook.data import get_docs_path
from chartbook.errors import CatalogNotConfiguredError
try:
path = get_docs_path(pipeline, dataframe, catalog_path=catalog)
click.echo(str(path))
except CatalogNotConfiguredError as e:
click.echo(f"Error: {e}", err=True)
click.echo("", err=True)
click.echo("Run 'chartbook config' to set a default catalog.", err=True)
raise SystemExit(1)
except KeyError as e:
click.echo(f"Error: {e}", err=True)
raise SystemExit(1)
@main.command()
def config():
"""Configure the default catalog path for data loading.
Sets the path to a catalog's ``chartbook.toml`` in
``~/.chartbook/settings.toml`` so that ``data.load()`` can find
pipelines without an explicit ``catalog_path`` argument.
"""
from chartbook.config import (
get_default_catalog_path,
set_default_catalog_path,
)
current = get_default_catalog_path()
if current is not None:
click.echo(f"Current catalog path: {current}")
click.echo("")
raw_path = click.prompt(
"Path to catalog chartbook.toml (or its parent directory)",
type=str,
)
catalog_path = Path(raw_path).expanduser()
try:
set_default_catalog_path(catalog_path)
except FileNotFoundError as exc:
click.echo(f"Error: {exc}", err=True)
raise SystemExit(1)
except ValueError as exc:
click.echo(f"Error: {exc}", err=True)
raise SystemExit(1)
resolved = get_default_catalog_path()
click.echo(f"Catalog path set to: {resolved}")
click.echo("")
click.echo("You can now load data with:")
click.echo(' from chartbook import data')
click.echo(' df = data.load(pipeline="my_pipeline", dataframe="my_df")')
if __name__ == "__main__":
main()