diff --git a/.eslintrc.json b/.eslintrc.json index 4ac446e463..9425f2ba68 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -12,7 +12,9 @@ "parserOptions": { "ecmaFeatures": { "jsx": true - } + }, + "ecmaVersion": "latest", + "sourceType": "module" }, "env": { "browser": true, @@ -24,6 +26,8 @@ } }, "rules": { + "react/jsx-uses-react": "off", + "react/react-in-jsx-scope": "off", "@typescript-eslint/naming-convention": [ "error", { diff --git a/content/docs/api-reference/artifacts_show.md b/content/docs/api-reference/artifacts_show.md index b489e2b350..915c5e766e 100644 --- a/content/docs/api-reference/artifacts_show.md +++ b/content/docs/api-reference/artifacts_show.md @@ -14,17 +14,16 @@ def artifacts_show( -If you have a DVC Studio project configured with your [remote storage -credentials], you may also use the DVC Studio [REST API] to programmatically -access artifacts. It does not require the client to have any credentials other -than the DVC Studio [client access token] and does not require DVC to be -installed. +If you have a DVC Studio project configured with your [remote +storage credentials], you may also use the DVC Studio [REST API] to programmatically +access artifacts. It does not require the client to have any credentials other than +the DVC Studio [client access token] and does not require DVC to be installed. ## Usage: -```py: +```py import dvc.api artifact = dvc.api.artifacts_show( diff --git a/content/docs/api-reference/get_url.md b/content/docs/api-reference/get_url.md index acbb310ff8..1316a8a285 100644 --- a/content/docs/api-reference/get_url.md +++ b/content/docs/api-reference/get_url.md @@ -29,13 +29,13 @@ resource_url = dvc.api.get_url( ## Description Returns the URL string of the storage location (in a [DVC remote] where a target -file or directory, specified by its `path` in a `repo` (DVC -project), is stored. +file or directory, specified by its `path` in a `repo` (DVC project), +is stored. -The URL is formed by reading the project's [remote configuration] and the -`dvc.yaml` or `.dvc` file where the given `path` is found (`outs` field). The -schema of the URL returned depends on the [storage type] of the `remote` (see -the [Parameters](#parameters) section). +The URL is formed by reading the project's [remote configuration] and the `dvc.yaml` +or `.dvc` file where the given `path` is found (`outs` field). The schema of the +URL returned depends on the [storage type] of the `remote` (see the +[Parameters](#parameters) section). If the target is a directory, the returned URL will end in `.dir`. Refer to [Structure of cache directory] and `dvc add` to learn more about how DVC handles @@ -79,13 +79,11 @@ appropriate library, such as [`boto3`] or [`paramiko`]. walking up from the current working directory tree). - `rev` - Git commit (any [revision](https://git-scm.com/docs/revisions) such as - a branch or tag name, commit hash, or [experiment name]). If `repo` is not a - Git repo, this option is ignored. _Default_: `None` (current working tree will - be used) + a branch or tag name, commit hash, or [experiment name]). If `repo` is not a Git + repo, this option is ignored. _Default_: `None` (current working tree will be used) -- `remote` - name of the [DVC remote] to use to form the returned URL string. - _Default_: The [default remote](/doc/command-reference/remote/default) of - `repo` is used. +- `remote` - name of the [DVC remote] to use to form the returned URL string. _Default_: + The [default remote](/doc/command-reference/remote/default) of `repo` is used. - `remote_config` - dictionary of options to pass to the DVC remote. This can be used to, for example, provide credentials to the `remote`. diff --git a/content/docs/api-reference/open.md b/content/docs/api-reference/open.md index 591604a291..cfe194f1e1 100644 --- a/content/docs/api-reference/open.md +++ b/content/docs/api-reference/open.md @@ -28,8 +28,8 @@ with dvc.api.open( ## Description Open a data or model file tracked in a DVC project and generate a -corresponding [file object]. The file can be tracked by DVC (as an -output) or by Git. +corresponding [file object]. The file can be tracked by DVC (as an output) +or by Git. [file object]: https://docs.python.org/3/glossary.html#term-file-object @@ -38,9 +38,9 @@ corresponding [file object]. The file can be tracked by DVC (as an The exact type of file object depends on the `mode` used. For more details, please refer to Python's [`open()`] built-in, which is used under the hood. -This function makes a direct connection to [remote storage], so the file -contents can be streamed. Your code can process the data [buffer] as it's -streamed, which optimizes memory usage. +This function makes a direct connection to [remote storage], so the file contents +can be streamed. Your code can process the data [buffer] as it's streamed, which +optimizes memory usage. [`open()`]: https://docs.python.org/3/library/functions.html#open [remote storage]: /doc/user-guide/data-management/remote-storage @@ -48,8 +48,8 @@ streamed, which optimizes memory usage. -`dvc.api.open()` may only be used as a [context manager] (using the `with` -keyword, as shown in the examples). +`dvc.api.open()` may only be used as a [context manager] (using the `with` keyword, +as shown in the examples). [context manager]: https://www.python.org/dev/peps/pep-0343/#context-managers-in-the-standard-library @@ -72,12 +72,12 @@ call – no _context manager_ involved. Neither function utilizes disc space. (the current working directory tree is walked up to find it). - `rev` - Git commit (any [revision] such as a branch or tag name, commit hash, - or [experiment name]). If `repo` is not a Git repo, this option is ignored. - _Default_: `None` (current working tree will be used) + or [experiment name]). If `repo` is not a Git repo, this option is ignored. _Default_: + `None` (current working tree will be used) -- `remote` - name of the [DVC remote] to look for the target data. _Default_: - The [default remote] of `repo` is used if a `remote` argument is not given. - For local projects, the cache is tried before the default remote. +- `remote` - name of the [DVC remote] to look for the target data. _Default_: The + [default remote] of `repo` is used if a `remote` argument is not given. For + local projects, the cache is tried before the default remote. - `remote_config` - dictionary of options to pass to the DVC remote. This can be used to, for example, provide credentials to the `remote`. @@ -186,9 +186,9 @@ directory tree, and look for the file contents of `clean.csv` in its local ## Example: Choose a specific remote as the data source -Sometimes we may want to choose a specific [remote storage] as source, for -example if the `repo` has no default remote set. This can be done by providing a -`remote` argument: +Sometimes we may want to choose a specific [remote storage] as source, for example +if the `repo` has no default remote set. This can be done by providing a `remote` +argument: ```py import dvc.api diff --git a/content/docs/api-reference/read.md b/content/docs/api-reference/read.md index de5e4194a2..6ad012a90f 100644 --- a/content/docs/api-reference/read.md +++ b/content/docs/api-reference/read.md @@ -63,12 +63,12 @@ Python's [`open()`] built-in, which is used under the hood. (the current working directory tree is walked up to find it). - `rev` - Git commit (any [revision] such as a branch or tag name, commit hash, - or [experiment name]). If `repo` is not a Git repo, this option is ignored. - _Default_: `None` (current working tree will be used) + or [experiment name]). If `repo` is not a Git repo, this option is ignored. _Default_: + `None` (current working tree will be used) -- `remote` - name of the [DVC remote] to look for the target data. _Default_: - The [default remote] of `repo` is used if a `remote` argument is not given. - For local projects, the cache is tried before the default remote. +- `remote` - name of the [DVC remote] to look for the target data. _Default_: The + [default remote] of `repo` is used if a `remote` argument is not given. For + local projects, the cache is tried before the default remote. - `remote_config` - dictionary of options to pass to the DVC remote. This can be used to, for example, provide credentials to the `remote`. diff --git a/content/docs/command-reference/artifacts/get.md b/content/docs/command-reference/artifacts/get.md index a6ca199ac3..670fd3a3c1 100644 --- a/content/docs/command-reference/artifacts/get.md +++ b/content/docs/command-reference/artifacts/get.md @@ -39,12 +39,12 @@ addressed in the form `path/to/dvc.yaml:artifact_name` or `path/to:artifact_name` (where `dvc.yaml` is omitted). `dvc artifacts get` will first try to download artifacts via the DVC Studio -[REST API]. Downloading an artifact using the Studio REST API only requires a -valid DVC Studio [client access token] and a Studio project configured with your -[remote storage credentials]. It does not require the client to have those -credentials. If you do not have a valid DVC Studio token, or the artifact is not -tracked in the model registry, DVC will fall back to its typical method to get -files (see `dvc get`). +[REST API]. Downloading an artifact using the Studio REST API only requires a valid +DVC Studio [client access token] and a Studio project configured with your [remote +storage +credentials]. It does not require the client to have those credentials. If you do +not have a valid DVC Studio token, or the artifact is not tracked in the model registry, +DVC will fall back to its typical method to get files (see `dvc get`). ## Options diff --git a/content/docs/command-reference/checkout.md b/content/docs/command-reference/checkout.md index f7473da9d2..a1ab593167 100644 --- a/content/docs/command-reference/checkout.md +++ b/content/docs/command-reference/checkout.md @@ -45,14 +45,14 @@ after `git checkout`. See the for more details. By default, this command tries not make copies of cached files in the workspace, -using reflinks instead when supported by the file system (refer to [File link -types]). The next linking strategy default value is `copy` though, so unless -other file link types are manually configured in [`cache.type`]), files will be -copied. Keep in mind that having file copies doesn't present much of a negative -impact unless the project uses very large data (several GBs or more). But -leveraging file links is crucial with large files, for example when checking out -a 50Gb file by copying might take a few minutes whereas, with links, restoring -any file size will be almost instantaneous. +using reflinks instead when supported by the file system (refer to [File +link types]). The next linking strategy default value is `copy` though, so +unless other file link types are manually configured in [`cache.type`]), files +will be copied. Keep in mind that having file copies doesn't present much of a +negative impact unless the project uses very large data (several GBs or more). +But leveraging file links is crucial with large files, for example when checking +out a 50Gb file by copying might take a few minutes whereas, with links, +restoring any file size will be almost instantaneous. [File link types]: /doc/user-guide/data-management/large-dataset-optimization#file-link-types-for-the-dvc-cache @@ -68,9 +68,9 @@ such a case, `dvc checkout` prints a warning message. It also lists the partial progress made by the checkout. There are two methods to restore a file missing from the cache, depending on the -situation. In some cases, the data can be pulled from [remote storage] using -`dvc pull`. In other cases, the [pipeline] must be reproduced (using -`dvc repro`) to regenerate its outputs. +situation. In some cases, the data can be pulled from [remote storage] using `dvc pull`. +In other cases, the [pipeline] must be reproduced (using `dvc repro`) to regenerate +its outputs. [remote storage]: /doc/user-guide/data-management/remote-storage [pipeline]: /doc/command-reference/dag diff --git a/content/docs/command-reference/commit.md b/content/docs/command-reference/commit.md index 573d8126e0..cb8f65db6c 100644 --- a/content/docs/command-reference/commit.md +++ b/content/docs/command-reference/commit.md @@ -50,8 +50,8 @@ Some scenarios for `dvc commit` include: versions without having to execute stage commands. - Sometimes after executing a stage, we realize that not all of its - dependencies or outputs are defined in `dvc.yaml`. It is possible to [add the - missing deps/outs] without having to re-execute stages, and `dvc commit` is + dependencies or outputs are defined in `dvc.yaml`. It is possible to [add + the missing deps/outs] without having to re-execute stages, and `dvc commit` is needed to finalize the operation (see link). - It's also possible to execute stage commands by hand (without `dvc repro`), or diff --git a/content/docs/command-reference/config.md b/content/docs/command-reference/config.md index 79b9acfc36..f41a62d709 100644 --- a/content/docs/command-reference/config.md +++ b/content/docs/command-reference/config.md @@ -17,9 +17,9 @@ positional arguments: ## Description -You can query/set/replace/unset [DVC configuration] options with this command. -It takes a config option `name` (a [config section] and a key, separated by a -dot) and its `value` (any valid alpha-numeric string generally). +You can query/set/replace/unset [DVC configuration] options with this command. It +takes a config option `name` (a [config section] and a key, separated by a dot) +and its `value` (any valid alpha-numeric string generally). When reading config options (no `value` is given or `--list` is used), the values are read from a combined set of values from the system, global, project, diff --git a/content/docs/command-reference/data/status.md b/content/docs/command-reference/data/status.md index 88290170c0..46df9c19de 100644 --- a/content/docs/command-reference/data/status.md +++ b/content/docs/command-reference/data/status.md @@ -80,8 +80,8 @@ DVC uncommitted changes: - _Unchanged files_ have no modifications. Only shown if the `--unchanged` flag is used. -Individual changes to files inside [tracked directories] are not shown by -default but this can be enabled with the `--granular` flag. +Individual changes to files inside [tracked directories] are not shown by default +but this can be enabled with the `--granular` flag. [committed to dvc]: /doc/command-reference/commit [tracked directories]: /doc/command-reference/add#adding-entire-directories diff --git a/content/docs/command-reference/diff.md b/content/docs/command-reference/diff.md index 105df9ac26..3352f0aa9b 100644 --- a/content/docs/command-reference/diff.md +++ b/content/docs/command-reference/diff.md @@ -123,8 +123,8 @@ $ dvc diff ### Click and expand to set up the example -Let's checkout the [2-track-data] tag, corresponding to the [Data Versioning] -_Get Started_ chapter, right after we added `data.xml` file with DVC: +Let's checkout the [2-track-data] tag, corresponding to the [Data Versioning] _Get +Started_ chapter, right after we added `data.xml` file with DVC: ```cli $ git checkout 2-track-data diff --git a/content/docs/command-reference/exp/branch.md b/content/docs/command-reference/exp/branch.md index 54136a7689..3f62aeec5e 100644 --- a/content/docs/command-reference/exp/branch.md +++ b/content/docs/command-reference/exp/branch.md @@ -15,8 +15,8 @@ positional arguments: ## Description -Creates a new [Git branch] containing the target `experiment` from the -experiment's baseline (`HEAD` at the time the experiment was run). +Creates a new [Git branch] containing the target `experiment` from the experiment's +baseline (`HEAD` at the time the experiment was run). If you don't provide a `branch` name, the default one will be based on the name of the `experiment`. diff --git a/content/docs/command-reference/exp/clean.md b/content/docs/command-reference/exp/clean.md index 08f3549ad8..8176d2131e 100644 --- a/content/docs/command-reference/exp/clean.md +++ b/content/docs/command-reference/exp/clean.md @@ -14,10 +14,10 @@ Runs housekeeping tasks within the DVC repository, such as removing outdated internal experiments queue message files (to reduce disk space and improve performance). -This is done automatically when running [queued experiments]. Running -`dvc exp clean` manually should not be required for typical use cases, but it -may be needed in the event that a queue worker unexpectedly crashed, or was -forcefully killed by something other than DVC commands. +This is done automatically when running [queued experiments]. Running `dvc exp clean` +manually should not be required for typical use cases, but it may be needed in the +event that a queue worker unexpectedly crashed, or was forcefully killed by something +other than DVC commands. [queued experiments]: /doc/user-guide/experiment-management/running-experiments#the-experiments-queue diff --git a/content/docs/command-reference/exp/run.md b/content/docs/command-reference/exp/run.md index 1f2752fcd6..786f40c83f 100644 --- a/content/docs/command-reference/exp/run.md +++ b/content/docs/command-reference/exp/run.md @@ -55,9 +55,9 @@ This includes committing any changed data dependencies to the Use the `--set-param` (`-S`) option as a shortcut to change parameter values [on-the-fly] before running the experiment. -It's possible to [queue experiments] for later execution with the `--queue` -flag. Queued experiments can be run with `dvc queue start` and managed with -other `dvc queue` commands. +It's possible to [queue experiments] for later execution with the `--queue` flag. +Queued experiments can be run with `dvc queue start` and managed with other `dvc queue` +commands. @@ -66,9 +66,8 @@ See the [Running Experiments] guide for more details on these features and more. [Review] your experiments with `dvc exp show`. Successful ones can be [made -persistent] by restoring them via `dvc exp branch` or `dvc exp apply` and -committing them to the Git repo. Unnecessary ones can be [cleared] with -`dvc exp remove`. +persistent] by restoring them via `dvc exp branch` or `dvc exp apply` and committing +them to the Git repo. Unnecessary ones can be [cleared] with `dvc exp remove`. [on-the-fly]: #example-modify-parameters-on-the-fly [queue experiments]: @@ -88,14 +87,13 @@ committing them to the Git repo. Unnecessary ones can be [cleared] with default) before running the experiment. Use the optional `[:]` prefix to use a custom params file. - Valid `` values can be defined in Hydra's [basic override] - syntax (see [example](#example-modify-parameters-on-the-fly)). Hydra's - [choice] and [range] sweep overrides are also supported, but these require the - `--queue` flag to be provided as well (see - [example](#example-run-a-grid-search)). + Valid `` values can be defined in Hydra's [basic override] syntax + (see [example](#example-modify-parameters-on-the-fly)). Hydra's [choice] and [range] + sweep overrides are also supported, but these require the `--queue` flag to be + provided as well (see [example](#example-run-a-grid-search)). -- `-n `, `--name ` - specify a [unique name] for this experiment. A - default one will be generated otherwise, such as `puffy-daks`. +- `-n `, `--name ` - specify a [unique name] for this experiment. A default + one will be generated otherwise, such as `puffy-daks`. @@ -175,8 +173,8 @@ committing them to the Git repo. Unnecessary ones can be [cleared] with - `--pull` - attempts to download missing data as needed. This includes (1) dependencies of stages to be run, (2) outputs of otherwise unchanged stages to - be skipped, (3) [run cache] for stages to be checked out from cache (unless - `--no-run-cache` is passed). + be skipped, (3) [run cache] for stages to be checked out from cache (unless `--no-run-cache` + is passed). - `--allow-missing` - skip stages with no other changes than missing data. @@ -280,9 +278,9 @@ experiment we just ran (`puffy-daks`). `dvc exp run --set-param` (`-S`) saves you the need to manually edit a params file (see `dvc params`) before running an experiment. -This option accepts Hydra's [basic override] syntax. For example, it can -override (`train.epochs=10`), append (`+train.weight_decay=0.01`), or remove -(`~model.dropout`) parameters: +This option accepts Hydra's [basic override] syntax. For example, it can override +(`train.epochs=10`), append (`+train.weight_decay=0.01`), or remove (`~model.dropout`) +parameters: ```cli dvc exp run -S 'prepare.split=0.1' -S 'featurize.max_features=100' diff --git a/content/docs/command-reference/exp/save.md b/content/docs/command-reference/exp/save.md index 965efdb46b..4035d64966 100644 --- a/content/docs/command-reference/exp/save.md +++ b/content/docs/command-reference/exp/save.md @@ -46,9 +46,8 @@ use `--include-untracked` (`-I`) on untracked files explicitly (see an [Review] your experiments with `dvc exp show`. Successful ones can be [made -persistent] by restoring them via `dvc exp branch` or `dvc exp apply` and -committing them to the Git repo. Unnecessary ones can be [cleared] with -`dvc exp remove`. +persistent] by restoring them via `dvc exp branch` or `dvc exp apply` and committing +them to the Git repo. Unnecessary ones can be [cleared] with `dvc exp remove`. [review]: /doc/user-guide/experiment-management/comparing-experiments [made persistent]: @@ -58,8 +57,8 @@ committing them to the Git repo. Unnecessary ones can be [cleared] with ## Options -- `-n `, `--name ` - specify a [unique name] for this experiment. A - default one will be generated otherwise, such as `urban-sign`. +- `-n `, `--name ` - specify a [unique name] for this experiment. A default + one will be generated otherwise, such as `urban-sign`. @@ -123,8 +122,8 @@ Untracked files: We can inspect results with `dvc metrics show` (or other means) after running the experiment (in this case we can do so with `dvc repro` since the example -project uses a [DVC pipeline]). We are not quite ready for a Git commit, but we -want to save the results in the repo nonetheless: +project uses a [DVC pipeline]). We are not quite ready for a Git commit, but we want +to save the results in the repo nonetheless: ```cli dvc exp save --name extra-trees \ diff --git a/content/docs/command-reference/fetch.md b/content/docs/command-reference/fetch.md index 98d03a7f52..73f4b824a8 100644 --- a/content/docs/command-reference/fetch.md +++ b/content/docs/command-reference/fetch.md @@ -160,9 +160,9 @@ The workspace looks like this: └── ``` -This project comes with a predefined HTTP [remote storage]. We can now just run -`dvc fetch` to download the most recent `model.pkl`, `data.xml`, and other -DVC-tracked files into our local cache. +This project comes with a predefined HTTP [remote storage]. We can now just run `dvc fetch` +to download the most recent `model.pkl`, `data.xml`, and other DVC-tracked files +into our local cache. ```cli $ dvc status --cloud diff --git a/content/docs/command-reference/gc.md b/content/docs/command-reference/gc.md index 5c7a8592d7..cb2a297278 100644 --- a/content/docs/command-reference/gc.md +++ b/content/docs/command-reference/gc.md @@ -30,10 +30,10 @@ details. The data kept is determined by reading the DVC files in the set of commits of the given scope. -> Note that `dvc gc` tries to fetch missing [`.dir` files] from remote storage -> to local cache in order to determine which files should exist inside cached -> directories. These files may be missing if the cache was previously garbage -> collected, in a newly cloned copy of the repo, etc. +> Note that `dvc gc` tries to fetch missing [`.dir` files] from remote storage to +> local cache in order to determine which files should exist inside cached directories. +> These files may be missing if the cache was previously garbage collected, in a +> newly cloned copy of the repo, etc. Unless the `--cloud` option is used, any files collected from the cache can be restored using `dvc fetch`, as long as they have been previously uploaded with @@ -112,14 +112,14 @@ project we want to clear. - `--date ` - Keep experiments from any commits on of after a certain date. Argument `` expects a date in the [ISO 8601] format. -- `--all-experiments` keep cached objects referenced in all [DVC experiments], - as well as in the workspace (implying `-w`). This preserves the project's - [experimental] data. +- `--all-experiments` keep cached objects referenced in all [DVC experiments], as + well as in the workspace (implying `-w`). This preserves the project's [experimental] + data. -- `-p `, `--projects ` - if a single remote or a single [cache is - shared] among different projects, this option can be used to specify a list of - them (each project is a path) to keep data that is currently referenced from - them. +- `-p `, `--projects ` - if a single remote or a single [cache + is shared] among different projects, this option can be used to specify a list + of them (each project is a path) to keep data that is currently referenced + from them. - `--not-in-remote` - keep cached objects that are _not_ in the remote. This will remove the objects from the local cache that have been pushed and are diff --git a/content/docs/command-reference/get.md b/content/docs/command-reference/get.md index 2391f5369d..a12b499799 100644 --- a/content/docs/command-reference/get.md +++ b/content/docs/command-reference/get.md @@ -118,10 +118,9 @@ model.pkl ``` Note that the `model.pkl` file doesn't actually exist in the [root directory] of -the source Git repo. Instead, it's exported in the `dvc.yaml` file as an -output of the `train` stage (in the `outs` field). DVC will then -`dvc pull` the file from the `dvc remote default` of the source DVC project -(found in [its config file]). +the source Git repo. Instead, it's exported in the `dvc.yaml` file as an output +of the `train` stage (in the `outs` field). DVC will then `dvc pull` the file from +the `dvc remote default` of the source DVC project (found in [its config file]). [root directory]: https://github.com/iterative/example-get-started/tree/master/ [its config file]: @@ -176,12 +175,11 @@ file hash. the file or directory from. It also has the `--out` option to specify the location to place the target data within the workspace. Combining these two options allows us to do something we can't achieve with the regular -`git checkout` + `dvc checkout` process – see for example the [Switching between -versions] chapter of our _Get Started_. +`git checkout` + `dvc checkout` process – see for example the [Switching +between versions] chapter of our _Get Started_. -Let's use the [get started example repo] again, like in the previous example. -But this time, clone it first to see `dvc get` in action inside a DVC -project. +Let's use the [get started example repo] again, like in the previous example. But +this time, clone it first to see `dvc get` in action inside a DVC project. ```cli $ git clone https://github.com/iterative/example-get-started diff --git a/content/docs/command-reference/import-url.md b/content/docs/command-reference/import-url.md index 396571afbf..586bebd678 100644 --- a/content/docs/command-reference/import-url.md +++ b/content/docs/command-reference/import-url.md @@ -100,10 +100,10 @@ DVC supports several types of external locations (protocols): If you installed DVC via `pip` and plan to use cloud services as [remote -storage], you might need to install these optional dependencies: `[s3]`, -`[azure]`, `[gs]`, `[oss]`, `[ssh]`. Alternatively, use `[all]` to include them -all. The command should look like this: `pip install "dvc[s3]"`. (This example -installs `boto3` library along with DVC to support S3 storage.) +storage], you might need to install these optional dependencies: `[s3]`, `[azure]`, +`[gs]`, `[oss]`, `[ssh]`. Alternatively, use `[all]` to include them all. The command +should look like this: `pip install "dvc[s3]"`. (This example installs `boto3` library +along with DVC to support S3 storage.) @@ -116,10 +116,9 @@ installs `boto3` library along with DVC to support S3 storage.) [ETag](https://en.wikipedia.org/wiki/HTTP_ETag#Strong_and_weak_validation) is necessary to track if the specified URL changed. -DVC also supports capturing [cloud versioning] information from certain cloud -storage providers. When the `--version-aware` option is provided or when the -`url` argument includes a supported cloud versioning ID, DVC will import the -specified version. +DVC also supports capturing [cloud versioning] information from certain cloud storage +providers. When the `--version-aware` option is provided or when the `url` argument +includes a supported cloud versioning ID, DVC will import the specified version. [cloud versioning]: /doc/user-guide/data-management/cloud-versioning @@ -201,11 +200,10 @@ produces a regular stage in `dvc.yaml`. - `--fs-config =` - `dvc remote` config options for the target url. -- `--version-aware` - capture [cloud versioning] information of the current - version when importing the file. DVC will always - [pull](/doc/command-reference/pull) the versioned data from the source and - will not [push](/doc/command-reference/push) an additional copy to remote - storage. +- `--version-aware` - capture [cloud versioning] information of the current version + when importing the file. DVC will always [pull](/doc/command-reference/pull) the + versioned data from the source and will not [push](/doc/command-reference/push) + an additional copy to remote storage. - `-h`, `--help` - prints the usage/help message, and exit. @@ -238,8 +236,8 @@ $ git checkout 3-config-remote ## Example: Tracking a file from the web -An advanced alternate to the intro of the [Versioning Basics] part of the _Get -Started_ is to use `dvc import-url`: +An advanced alternate to the intro of the [Versioning Basics] part of the _Get Started_ +is to use `dvc import-url`: ```cli $ dvc import-url https://data.dvc.org/get-started/data.xml \ diff --git a/content/docs/command-reference/import.md b/content/docs/command-reference/import.md index 03f5668c3c..98ae814d2b 100644 --- a/content/docs/command-reference/import.md +++ b/content/docs/command-reference/import.md @@ -43,8 +43,8 @@ e.g. `data.txt.dvc` – similar to using `dvc add` after downloading the data. -DVC won't push data imported from other DVC repos to [remote storage]. -`dvc pull` will download from the original source. +DVC won't push data imported from other DVC repos to [remote storage]. `dvc pull` +will download from the original source. [remote storage]: /doc/user-guide/data-management/remote-storage diff --git a/content/docs/command-reference/init.md b/content/docs/command-reference/init.md index 0ba868dbc8..feec956f7a 100644 --- a/content/docs/command-reference/init.md +++ b/content/docs/command-reference/init.md @@ -134,8 +134,8 @@ revisions to compare. DVC sets the `core.no_scm` config option value to `true` in the [DVC configuration] when initialized this way. This means that even if the project is -tracked by Git, or if Git is initialized in it later, DVC will keep operating -detached from Git in this project. +tracked by Git, or if Git is initialized in it later, DVC will keep operating detached +from Git in this project. [dvc configuration]: /doc/user-guide/project-structure/configuration diff --git a/content/docs/command-reference/move.md b/content/docs/command-reference/move.md index 3b5394f0aa..088acf5684 100644 --- a/content/docs/command-reference/move.md +++ b/content/docs/command-reference/move.md @@ -92,8 +92,8 @@ $ mv keras.h5 model.h5 Often the output of a stage is a dependency in another stage, creating a -[dependency graph]. In this case, you may want to also update the `path` in the -`deps` field of `dvc.yaml`. +[dependency graph]. In this case, you may want to also update the `path` in the `deps` +field of `dvc.yaml`. [dependency graph]: /doc/user-guide/pipelines/defining-pipelines diff --git a/content/docs/command-reference/params/index.md b/content/docs/command-reference/params/index.md index a33d699ef9..bdf6082e25 100644 --- a/content/docs/command-reference/params/index.md +++ b/content/docs/command-reference/params/index.md @@ -61,9 +61,8 @@ Multiple stages of a pipeline can [use the same params file] as stage. Parameters can also be used for [templating] `dvc.yaml` itself (see also **Dict -Unpacking**), which means you can pass them to your [stage commands] as -command-line arguments. You can also load them in Python code with -`dvc.api.params_show()`. +Unpacking**), which means you can pass them to your [stage commands] as command-line +arguments. You can also load them in Python code with `dvc.api.params_show()`. The `dvc params diff` command is available to show parameter changes, displaying their current and previous values. diff --git a/content/docs/command-reference/plots/diff.md b/content/docs/command-reference/plots/diff.md index 5bc0023102..1f253e9e72 100644 --- a/content/docs/command-reference/plots/diff.md +++ b/content/docs/command-reference/plots/diff.md @@ -20,9 +20,8 @@ positional arguments: ## Description -This command is a way to visualize the "difference" between [certain metrics] -among versions of the repository, by overlaying them in a single -plot. +This command is a way to visualize the "difference" between [certain metrics] among +versions of the repository, by overlaying them in a single plot. > Note that unlike `dvc metrics diff`, this command does not calculate numeric > differences between plots file values. @@ -38,8 +37,8 @@ all of them in a single image). All plots defined in `dvc.yaml` are used by default, but specific files can be specified with the `--targets` option (any valid plots file is accepted). -The plot style can be customized with [plot templates], using the `--template` -option. See `dvc plots` to learn more about plots files and templates. +The plot style can be customized with [plot templates], using the `--template` option. +See `dvc plots` to learn more about plots files and templates. Another way to display plots is the `dvc plots show` command, which just lists all the current plots, without comparisons. diff --git a/content/docs/command-reference/plots/show.md b/content/docs/command-reference/plots/show.md index a58ad8a177..1c01f250c0 100644 --- a/content/docs/command-reference/plots/show.md +++ b/content/docs/command-reference/plots/show.md @@ -19,16 +19,15 @@ positional arguments: ## Description -This command provides a quick way to visualize [certain data] such as loss -functions, AUC curves, confusion matrices, etc. +This command provides a quick way to visualize [certain data] such as loss functions, +AUC curves, confusion matrices, etc. All plots defined in `dvc.yaml` are used by default, but you can specify any -`targets`, which can be plots files or non-file [plot IDs] defined anywhere in -`dvc.yaml`, or any other files (they don't necessarily have to be defined in -`dvc.yaml`). +`targets`, which can be plots files or non-file [plot IDs] defined anywhere in `dvc.yaml`, +or any other files (they don't necessarily have to be defined in `dvc.yaml`). -The plot style can be customized with [plot templates], using the `--template` -option. To learn more about plots file formats and templates, see `dvc plots`. +The plot style can be customized with [plot templates], using the `--template` option. +To learn more about plots file formats and templates, see `dvc plots`. [certain data]: /doc/user-guide/experiment-management/visualizing-plots#supported-plot-file-formats diff --git a/content/docs/command-reference/plots/templates.md b/content/docs/command-reference/plots/templates.md index bcaae84457..38635fb94e 100644 --- a/content/docs/command-reference/plots/templates.md +++ b/content/docs/command-reference/plots/templates.md @@ -18,10 +18,9 @@ positional arguments: By default, lists the names of all available built-in templates. Sometimes you may need to customize the way `dvc plots` are rendered beyond what -the built-in [plot templates] allow. You can get the JSON specification for a -specific built-in template by providing it's name as argument, for example -`dvc plots templates confusion`. To modify them, use any valid elements of the -[Vega-Lite specification]. +the built-in [plot templates] allow. You can get the JSON specification for a specific +built-in template by providing it's name as argument, for example `dvc plots templates confusion`. +To modify them, use any valid elements of the [Vega-Lite specification]. diff --git a/content/docs/command-reference/pull.md b/content/docs/command-reference/pull.md index 8418f599c8..8132d2c6e5 100644 --- a/content/docs/command-reference/pull.md +++ b/content/docs/command-reference/pull.md @@ -1,8 +1,7 @@ # pull Download tracked files or directories from [remote storage] based on the current -`dvc.yaml` and `.dvc` files, and make them visible in the -workspace. +`dvc.yaml` and `.dvc` files, and make them visible in the workspace. [remote storage]: /doc/user-guide/data-management/remote-storage @@ -22,11 +21,11 @@ positional arguments: ## Description The `dvc push` and `dvc pull` commands are the means for uploading and -downloading data to and from [remote storage] (S3, SSH, GCS, etc.). These -commands are similar to `git push` and `git pull`, respectively. [Data sharing] -across environments and preserving data versions (input datasets, intermediate -results, models, `dvc metrics`, etc.) remotely are the most common use cases for -these commands. +downloading data to and from [remote storage] (S3, SSH, GCS, etc.). These commands +are similar to `git push` and `git pull`, respectively. [Data sharing] across +environments and preserving data versions (input datasets, intermediate results, +models, `dvc metrics`, etc.) remotely are the most common use cases for these +commands. `dvc pull` downloads tracked data from a `dvc remote` to the cache, and links (or copies) the files or directories to the workspace @@ -118,8 +117,8 @@ used to see what files `dvc pull` would download. - `--run-cache`, `--no-run-cache` - downloads all available history of [stage runs] from the `dvc remote` (to the cache only, like `dvc fetch --run-cache`). - Note that `dvc repro ` is necessary to checkout these files (into - the workspace) and update `dvc.lock`. + Note that `dvc repro ` is necessary to checkout these files (into the + workspace) and update `dvc.lock`. - `--allow-missing` - allows the command to succeed even if some files or directories are missing. diff --git a/content/docs/command-reference/push.md b/content/docs/command-reference/push.md index 7b300d12e1..23cf0c119d 100644 --- a/content/docs/command-reference/push.md +++ b/content/docs/command-reference/push.md @@ -1,7 +1,7 @@ # push -Upload tracked files or directories to [remote storage] based on the current -dvc files files. +Upload tracked files or directories to [remote storage] based on the current dvc +files files. [remote storage]: /doc/user-guide/data-management/remote-storage @@ -21,9 +21,9 @@ positional arguments: ## Description The `dvc push` and `dvc pull` commands are the means for uploading and -downloading data to and from [remote storage] (S3, SSH, GCS, etc.). These -commands are similar to `git push` and `git pull`, respectively. [Data sharing] -across environments, and preserving data versions (input datasets, intermediate +downloading data to and from [remote storage] (S3, SSH, GCS, etc.). These commands +are similar to `git push` and `git pull`, respectively. [Data sharing] across +environments, and preserving data versions (input datasets, intermediate results, models, `dvc metrics`, etc.) remotely are the most common use cases for these commands. @@ -153,8 +153,7 @@ a [pipeline](/doc/command-reference/dag) has been set up with these `matrix-train` Imagine the project has been modified such that the -outputs of some of these stages need to be uploaded to [remote -storage]. +outputs of some of these stages need to be uploaded to [remote storage]. ```cli $ dvc status --cloud @@ -193,10 +192,9 @@ Finally, we used `dvc status` to double check that all data had been uploaded. https://www.youtube.com/watch?v=FYmmiAz81G4 -Let's take a detailed look at what happens to the [cache directory] as you run -an experiment locally and push data to remote storage. To set the example -consider having created a project with some code, data, and a -`dvc remote` setup. +Let's take a detailed look at what happens to the [cache directory] as you run an +experiment locally and push data to remote storage. To set the example consider having +created a project with some code, data, and a `dvc remote` setup. Some work has been performed in the workspace, and new data is ready for uploading to the remote. `dvc status --cloud` will list several files in `new` diff --git a/content/docs/command-reference/queue/index.md b/content/docs/command-reference/queue/index.md index 6bb18f4a32..cc97ea86bb 100644 --- a/content/docs/command-reference/queue/index.md +++ b/content/docs/command-reference/queue/index.md @@ -1,11 +1,8 @@ # queue -A set of commands to manage the [DVC experiments] task queue: -[start](/doc/command-reference/queue/start), -[stop](/doc/command-reference/queue/stop), -[status](/doc/command-reference/queue/status), -[logs](/doc/command-reference/queue/logs), -[remove](/doc/command-reference/queue/remove), +A set of commands to manage the [DVC experiments] task queue: [start](/doc/command-reference/queue/start), +[stop](/doc/command-reference/queue/stop), [status](/doc/command-reference/queue/status), +[logs](/doc/command-reference/queue/logs), [remove](/doc/command-reference/queue/remove), [kill](/doc/command-reference/queue/kill) [dvc experiments]: /doc/user-guide/experiment-management diff --git a/content/docs/command-reference/queue/start.md b/content/docs/command-reference/queue/start.md index 7228034e9a..2bf4850cd5 100644 --- a/content/docs/command-reference/queue/start.md +++ b/content/docs/command-reference/queue/start.md @@ -19,9 +19,9 @@ until either `dvc queue stop` is used or the queue is empty. -Due to [internal limitations], when the queue is empty a worker may be idle for -up to 10 seconds before exiting. If new experiment tasks are added to the queue -during this time, workers will resume processing them instead. +Due to [internal limitations], when the queue is empty a worker may be idle for up +to 10 seconds before exiting. If new experiment tasks are added to the queue during +this time, workers will resume processing them instead. [internal limitations]: /doc/user-guide/experiment-management/running-experiments#how-are-experiments-queued diff --git a/content/docs/command-reference/queue/stop.md b/content/docs/command-reference/queue/stop.md index 33cc2025e5..8acaa19d3d 100644 --- a/content/docs/command-reference/queue/stop.md +++ b/content/docs/command-reference/queue/stop.md @@ -1,7 +1,7 @@ ## queue stop -Stop running queued [DVC experiments] (see `dvc queue start`) after the current -ones are finished running. +Stop running queued [DVC experiments] (see `dvc queue start`) after the current ones +are finished running. [dvc experiments]: /doc/user-guide/experiment-management diff --git a/content/docs/command-reference/remote/add.md b/content/docs/command-reference/remote/add.md index 34bfdf1edf..c687648b13 100644 --- a/content/docs/command-reference/remote/add.md +++ b/content/docs/command-reference/remote/add.md @@ -27,10 +27,10 @@ positional arguments: ## Description -Registers a [remote storage] location to save data files (besides the -cache) and optionally sets it as the `--default` remote. DVC -remotes can point to a cloud storage service, an SSH server, network-attached -storage, or even a directory in the local file system. +Registers a [remote storage] location to save data files (besides the cache) +and optionally sets it as the `--default` remote. DVC remotes can point to a cloud +storage service, an SSH server, network-attached storage, or even a directory in +the local file system. [remote storage]: /doc/user-guide/data-management/remote-storage @@ -72,10 +72,9 @@ $ dvc remote add -d temp /tmp/dvcstore -If you [installed DVC] via `pip` and plan to use cloud services as remote -storage, you might need to install these optional dependencies: `[s3]`, -`[azure]`, `[gdrive]`, `[gs]`, `[oss]`, `[ssh]`. Use `[all]` to include them -all. For example: +If you [installed DVC] via `pip` and plan to use cloud services as remote storage, +you might need to install these optional dependencies: `[s3]`, `[azure]`, `[gdrive]`, +`[gs]`, `[oss]`, `[ssh]`. Use `[all]` to include them all. For example: ```cli $ pip install "dvc[s3]" diff --git a/content/docs/command-reference/remote/index.md b/content/docs/command-reference/remote/index.md index 7ddfdd9280..72b1b6845a 100644 --- a/content/docs/command-reference/remote/index.md +++ b/content/docs/command-reference/remote/index.md @@ -1,12 +1,9 @@ # remote -A set of commands to set up and manage [remote storage]: -[add](/doc/command-reference/remote/add), -[default](/doc/command-reference/remote/default), -[list](/doc/command-reference/remote/list), -[modify](/doc/command-reference/remote/modify), -[remove](/doc/command-reference/remote/remove), and -[rename](/doc/command-reference/remote/rename). +A set of commands to set up and manage [remote storage]: [add](/doc/command-reference/remote/add), +[default](/doc/command-reference/remote/default), [list](/doc/command-reference/remote/list), +[modify](/doc/command-reference/remote/modify), [remove](/doc/command-reference/remote/remove), +and [rename](/doc/command-reference/remote/rename). [remote storage]: /doc/user-guide/data-management/remote-storage @@ -41,8 +38,8 @@ Learn more about [remote storage]. -`dvc remote` subcommands read or modify DVC [config files] (`.dvc/config` by -default). Alternatively, the config files can be edited manually. +`dvc remote` subcommands read or modify DVC [config files] (`.dvc/config` by default). +Alternatively, the config files can be edited manually. [types of storage]: /doc/user-guide/data-management/remote-storage#supported-storage-types diff --git a/content/docs/command-reference/remote/list.md b/content/docs/command-reference/remote/list.md index 408bf64b9c..5d23781f1c 100644 --- a/content/docs/command-reference/remote/list.md +++ b/content/docs/command-reference/remote/list.md @@ -11,9 +11,9 @@ usage: dvc remote list [-h] [--global | --system | --project | --local] ## Description -Reads [DVC configuration] and prints the list of available remotes, including -their names and URLs/paths. Remotes are read from the system, global, project, -and local config files (in that order). +Reads [DVC configuration] and prints the list of available remotes, including their +names and URLs/paths. Remotes are read from the system, global, project, and local +config files (in that order). [dvc configuration]: /doc/user-guide/project-structure/configuration#remote diff --git a/content/docs/command-reference/remote/modify.md b/content/docs/command-reference/remote/modify.md index fb99531e1b..78c2d1063b 100644 --- a/content/docs/command-reference/remote/modify.md +++ b/content/docs/command-reference/remote/modify.md @@ -46,10 +46,9 @@ $ dvc remote modify temp url /mnt/c/tmp/dvcstore -If you [installed DVC] via `pip` and plan to use cloud services as remote -storage, you might need to install these optional dependencies: `[s3]`, -`[azure]`, `[gdrive]`, `[gs]`, `[oss]`, `[ssh]`. Use `[all]` to include them -all. For example: +If you [installed DVC] via `pip` and plan to use cloud services as remote storage, +you might need to install these optional dependencies: `[s3]`, `[azure]`, `[gdrive]`, +`[gs]`, `[oss]`, `[ssh]`. Use `[all]` to include them all. For example: ```cli $ pip install "dvc[s3]" diff --git a/content/docs/command-reference/remote/remove.md b/content/docs/command-reference/remote/remove.md index 3d38fecf9f..217d1dcad8 100644 --- a/content/docs/command-reference/remote/remove.md +++ b/content/docs/command-reference/remote/remove.md @@ -4,8 +4,8 @@ Remove a `dvc remote`. -This command affects [DVC configuration] files only. It does not physically -remove data files stored remotely. See `dvc gc --cloud` for that. +This command affects [DVC configuration] files only. It does not physically remove +data files stored remotely. See `dvc gc --cloud` for that. @@ -23,8 +23,8 @@ positional arguments: ## Description -This command removes a section in the [DVC configuration] file. Alternatively, -it is possible to edit config files manually. +This command removes a section in the [DVC configuration] file. Alternatively, it +is possible to edit config files manually. The `name` argument is required. diff --git a/content/docs/command-reference/repro.md b/content/docs/command-reference/repro.md index 4a13fe7c73..7b4545e2ba 100644 --- a/content/docs/command-reference/repro.md +++ b/content/docs/command-reference/repro.md @@ -41,8 +41,8 @@ For stages with multiple commands (having a list in the `cmd` field), commands are run one after the other in the order they are defined. The failure of any command will halt the remaining stage execution and raise an error. -Stages without dependencies nor outputs are considered [always changed], so -`dvc repro` always runs them. +Stages without dependencies nor outputs are considered [always changed], so `dvc repro` +always runs them. @@ -159,8 +159,8 @@ final stage. option, as all possible targets are already included. - `--no-run-cache` - execute stage command(s) even if they have already been run - with the same dependencies and outputs (see the [run cache]). Useful for - example if the stage command/s is/are non-deterministic ([not recommended]). + with the same dependencies and outputs (see the [run cache]). Useful for example + if the stage command/s is/are non-deterministic ([not recommended]). - `--force-downstream` - in cases like `... -> A (changed) -> B -> C` it will reproduce `A` first and then `B`, even if `B` was previously executed with the @@ -184,8 +184,8 @@ final stage. - `--pull` - attempts to download missing data as needed. This includes (1) dependencies of stages to be run, (2) outputs of otherwise unchanged stages to - be skipped, (3) [run cache] for stages to be checked out from cache (unless - `--no-run-cache` is passed). + be skipped, (3) [run cache] for stages to be checked out from cache (unless `--no-run-cache` + is passed). - `--allow-missing` - skip stages with no other changes than missing data. diff --git a/content/docs/command-reference/stage/add.md b/content/docs/command-reference/stage/add.md index 0cb436d7a9..530f358350 100644 --- a/content/docs/command-reference/stage/add.md +++ b/content/docs/command-reference/stage/add.md @@ -73,19 +73,19 @@ is reproduced (see also `dvc gc`). Relevant notes: source code changes, DVC knows that the stage needs to be reproduced. (You can chose whether to do this.) -- `dvc stage add` checks the [dependency graph] integrity before creating a new - stage. For example: two stage cannot specify the same output or overlapping - output paths, there should be no cycles, etc. +- `dvc stage add` checks the [dependency graph] integrity before creating a new stage. + For example: two stage cannot specify the same output or overlapping output paths, + there should be no cycles, etc. - DVC does not feed dependency files to the command being run. The program will have to read the files itself. - Entire directories produced by the stage can be tracked as outputs by DVC, - which generates a single `.dir` entry in the cache (refer to [Structure of - cache directory] for more info.) + which generates a single `.dir` entry in the cache (refer to [Structure + of cache directory] for more info.) -- [external dependencies and outputs] (outside of the workspace) - are also supported (except metrics and plots). +- [external dependencies and outputs] (outside of the workspace) are + also supported (except metrics and plots). - Since outputs are deleted from the workspace before executing stage commands, the underlying code should create any directory structures @@ -95,8 +95,8 @@ is reproduced (see also `dvc gc`). Relevant notes: some of the dependencies or outputs are missing from `dvc.yaml`. It is possible to [add them to an existing stage]. -- Renaming dependencies or outputs requires a [manual process] to update - `dvc.yaml` and the project's cache accordingly. +- Renaming dependencies or outputs requires a [manual process] to update `dvc.yaml` + and the project's cache accordingly. [add them to an existing stage]: /docs/user-guide/how-to/add-deps-or-outs-to-a-stage @@ -397,8 +397,8 @@ We use [ruamel.yaml](https://pypi.org/project/ruamel.yaml/) which supports YAML -You can also [use templating] to parse parameters directly from `params.yaml` -into the stage. +You can also [use templating] to parse parameters directly from `params.yaml` into +the stage. [use templating]: /doc/user-guide/project-structure/dvcyaml-files#templating diff --git a/content/docs/command-reference/status.md b/content/docs/command-reference/status.md index 3186e26c64..9a7d1b5850 100644 --- a/content/docs/command-reference/status.md +++ b/content/docs/command-reference/status.md @@ -32,8 +32,8 @@ Searches for changes in the existing tracked data and pipelines. In local mode, it shows which files or directories have changed in the workspace (thus could be [added](/doc/command-reference/add) or [reproduced](/doc/command-reference/repro) again). In remote mode, it reports -the differences between cache vs. [remote storage] (`dvc push` or -`dvc pull` could be used to synchronize these). +the differences between cache vs. [remote storage] (`dvc push` or `dvc pull` +could be used to synchronize these). | Mode | Option | Description | | ------ | ----------------- | --------------------------------------------------------------------------------------------------------------------------- | @@ -99,8 +99,8 @@ detailed bellow. - _missing_ means that the file/directory doesn't exist neither in cache, nor in remote storage. -For _new_ and _deleted_ data, the cache is different from [remote storage]. -Bringing the two into sync requires `dvc pull` or `dvc push`. +For _new_ and _deleted_ data, the cache is different from [remote storage]. Bringing +the two into sync requires `dvc pull` or `dvc push`. For _missing_ data, there's nothing to retrieve from storage. This can happen for example in fresh DVC repository clones if the data wasn't diff --git a/content/docs/command-reference/studio/index.md b/content/docs/command-reference/studio/index.md index d76d0ea89e..09d1e4bcfb 100644 --- a/content/docs/command-reference/studio/index.md +++ b/content/docs/command-reference/studio/index.md @@ -3,8 +3,7 @@ A set of commands to authenticate DVC with [Studio](https://studio.iterative.ai) and save a [client access token](/doc/studio/user-guide/account-management#client-access-tokens) -to global [DVC configuration]: [login](/doc/command-reference/studio/login), -[logout](/doc/command-reference/studio/logout), +to global [DVC configuration]: [login](/doc/command-reference/studio/login), [logout](/doc/command-reference/studio/logout), [token](/doc/command-reference/studio/token). [dvc configuration]: diff --git a/content/docs/command-reference/update.md b/content/docs/command-reference/update.md index 709cde4274..e81a5eba1a 100644 --- a/content/docs/command-reference/update.md +++ b/content/docs/command-reference/update.md @@ -41,9 +41,9 @@ $ dvc update --rev master ## Options -- `--rev ` - commit hash, branch or tag name, etc. (any [Git revision]) - of the repository to update the file or directory from. The latest commit (in - the default branch) is used by default. +- `--rev ` - commit hash, branch or tag name, etc. (any [Git + revision]) of the repository to update the file or directory from. The latest commit + (in the default branch) is used by default. For data obtained with `dvc import-url --version-aware`, this option can be used to specify an object version ID. By default, the current version from diff --git a/content/docs/contributing/docs.md b/content/docs/contributing/docs.md index 81eecda454..347d3ec11d 100644 --- a/content/docs/contributing/docs.md +++ b/content/docs/contributing/docs.md @@ -55,8 +55,9 @@ formatted and linted as well, which is also ensured by the full setup below. Make sure you have a recent LTS version of [Node.js](https://nodejs.org/en/) (`>=18.0.0`, `<=20.x`), and install [Yarn](https://yarnpkg.com/): -> In Windows, you may need to install [Visual Studio Build Tools], and the -> [Windows SDK] first. +> In Windows, you may need to install [Visual Studio Build Tools], and the [Windows +> +> > SDK] first. [windows sdk]: https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/ diff --git a/content/docs/dvclive/index.md b/content/docs/dvclive/index.md index 3ea84f6eef..bcce3119a8 100644 --- a/content/docs/dvclive/index.md +++ b/content/docs/dvclive/index.md @@ -162,9 +162,9 @@ analysis and comparison. Experimenting in Python interactively (like in notebooks) is great for exploration, but eventually you may need a more structured way to run reproducible experiments. By configuring DVC pipelines, you can -[run experiments] with `dvc exp run`. Pipelines help you organize your ML -workflow beyond a single notebook or script so you can modularize and -parametrize your code. See how to [setup a pipeline] to work with DVCLive. +[run experiments] with `dvc exp run`. Pipelines help you organize your ML workflow +beyond a single notebook or script so you can modularize and parametrize your code. +See how to [setup a pipeline] to work with DVCLive. [release notes]: https://github.com/iterative/dvclive/releases/tag/3.0.0 [directory]: /doc/dvclive/how-it-works diff --git a/content/docs/dvclive/install.md b/content/docs/dvclive/install.md index d948a4fb98..7ae4515012 100644 --- a/content/docs/dvclive/install.md +++ b/content/docs/dvclive/install.md @@ -10,8 +10,8 @@ Note that Python 3.8+ is needed to get the latest version of DVClive. $ pip install dvclive ``` -Depending on the type of the [DVClive methods] you plan to use, you might need -to install optional dependencies: +Depending on the type of the [DVClive methods] you plan to use, you might need to +install optional dependencies: - install `[image]` to use [`log_image`] - install `[plots]` to use [`log_plot`] @@ -19,9 +19,9 @@ to install optional dependencies: - install `[markdown]` to use [`make_report`] when `report=md` or `report=notebook` -If you use one of the supported [ML frameworks], you can also install the -optional dependencies: `[huggingface]`, `[lightning]`, `[tf]`, `[fastai]`, -`[optuna]`, `[xgb]`, `[catalyst]`, `[lgbm]`, `[mmcv]`. +If you use one of the supported [ML frameworks], you can also install the optional +dependencies: `[huggingface]`, `[lightning]`, `[tf]`, `[fastai]`, `[optuna]`, `[xgb]`, +`[catalyst]`, `[lgbm]`, `[mmcv]`. Use `[all]` to include them all. diff --git a/content/docs/dvclive/live/log_param.md b/content/docs/dvclive/live/log_param.md index d9d16b216a..b3580eca32 100644 --- a/content/docs/dvclive/live/log_param.md +++ b/content/docs/dvclive/live/log_param.md @@ -34,8 +34,8 @@ The logged params can be visualized with `dvc params`: $ dvc params diff dvclive/params.yaml ``` -If you use DVC pipelines, [parameter dependencies] are tracked -automatically, and you can skip logging them with DVCLive. +If you use DVC pipelines, [parameter dependencies] are tracked automatically, +and you can skip logging them with DVCLive. diff --git a/content/docs/dvclive/live/log_params.md b/content/docs/dvclive/live/log_params.md index 42115d038f..487dd9b7f9 100644 --- a/content/docs/dvclive/live/log_params.md +++ b/content/docs/dvclive/live/log_params.md @@ -47,8 +47,8 @@ The logged params can be visualized with `dvc params`: dvc params diff dvclive/params.yaml ``` -If you use DVC pipelines, [parameter dependencies] are tracked -automatically, and you can skip logging them with DVCLive. +If you use DVC pipelines, [parameter dependencies] are tracked automatically, +and you can skip logging them with DVCLive. diff --git a/content/docs/dvclive/ml-frameworks/pytorch-lightning.md b/content/docs/dvclive/ml-frameworks/pytorch-lightning.md index 8ac01ff4a6..095683ed87 100644 --- a/content/docs/dvclive/ml-frameworks/pytorch-lightning.md +++ b/content/docs/dvclive/ml-frameworks/pytorch-lightning.md @@ -93,8 +93,8 @@ checkpointing at all as described in the Use `log_model` to save the checkpoints (it will use `Live.log_artifact()` internally to save those). At the end of training, DVCLive will copy the [`best_model_path`][`ModelCheckpoint`] to the `dvclive/artifacts` directory and -annotate it with name `best` (for example, to be consumed in [DVC Studio] -model registry or automation scenarios). +annotate it with name `best` (for example, to be consumed in [DVC Studio] model +registry or automation scenarios). - Save updates to the checkpoints directory at the end of training: diff --git a/content/docs/gto/command-reference/show.md b/content/docs/gto/command-reference/show.md index 87b9af9d27..0a267f1899 100644 --- a/content/docs/gto/command-reference/show.md +++ b/content/docs/gto/command-reference/show.md @@ -146,6 +146,5 @@ Model Registries work. for each version. -1 for all [default: -1] - `--vs `, `--versions-per-stage ` - Show N last versions for each stage. -1 for all. Applied after 'assignments-per-version' [default: 1] -- `--sort ` - Order assignments by timestamp or semver [default: - timestamp] +- `--sort ` - Order assignments by timestamp or semver [default: timestamp] - `-h`, `--help` - Show this message and exit. diff --git a/content/docs/gto/get-started.md b/content/docs/gto/get-started.md index 383d1d434a..62a0570e2e 100644 --- a/content/docs/gto/get-started.md +++ b/content/docs/gto/get-started.md @@ -101,8 +101,8 @@ You'll need a [GitHub account](https://github.com/signup)) for this. -1. [Fork the example repo]. Make sure you uncheck "Copy the `main` branch only" - to preserve the repo's tags. +1. [Fork the example repo]. Make sure you uncheck "Copy the `main` branch only" to + preserve the repo's tags. 2. Enable the [workflows] in your fork's **Settings** -> **Actions** page. Now its [preconfigured jobs] will trigger when Git tags are pushed. diff --git a/content/docs/gto/install.md b/content/docs/gto/install.md index cf38da7ec7..2b3eaa52ba 100644 --- a/content/docs/gto/install.md +++ b/content/docs/gto/install.md @@ -14,8 +14,8 @@ it with a package manager like [pip](https://pypi.org/project/pip/) or -We **strongly** recommend creating a [virtual environment] or using [pipx] to -encapsulate your local environment. +We **strongly** recommend creating a [virtual environment] or using [pipx] to encapsulate +your local environment. [virtual environment]: https://python.readthedocs.io/en/stable/library/venv.html [pipx]: diff --git a/content/docs/index.md b/content/docs/index.md index b867a0c041..0281a2311b 100644 --- a/content/docs/index.md +++ b/content/docs/index.md @@ -8,8 +8,8 @@ projects reproducible, and collaborate better. -**DVC** can be installed on [Visual Studio Code], any [system terminal], and -used as a [Python library]. +**DVC** can be installed on [Visual Studio Code], any [system terminal], and used +as a [Python library]. [visual studio code]: /doc/vs-code-extension [system terminal]: /doc/install diff --git a/content/docs/install/ide-plugins.md b/content/docs/install/ide-plugins.md index 9a613a5104..710c747163 100644 --- a/content/docs/install/ide-plugins.md +++ b/content/docs/install/ide-plugins.md @@ -14,8 +14,7 @@ Install the [DVC Extension] for VS Code to use DVC right from your IDE! -Enable enhanced [IntelliSense] on `dvc.yaml` files by installing the [YAML -extension]. +Enable enhanced [IntelliSense] on `dvc.yaml` files by installing the [YAML extension]. To make `dvc.lock` and `.dvc` files recognized as YAML, add this to `settings.json`: diff --git a/content/docs/install/linux.md b/content/docs/install/linux.md index 812837d6d3..b578f557f6 100644 --- a/content/docs/install/linux.md +++ b/content/docs/install/linux.md @@ -30,8 +30,8 @@ $ pip install dvc ``` Depending on the type of the [remote storage] you plan to use, you might need to -install optional dependencies: `[s3]`, `[gdrive]`, `[gs]`, `[azure]`, `[ssh]`, -`[hdfs]`, `[webdav]`, `[oss]`. Use `[all]` to include them all. +install optional dependencies: `[s3]`, `[gdrive]`, `[gs]`, `[azure]`, `[ssh]`, `[hdfs]`, +`[webdav]`, `[oss]`. Use `[all]` to include them all. [remote storage]: /doc/user-guide/data-management/remote-storage @@ -67,8 +67,8 @@ $ mamba install -c conda-forge dvc ``` Depending on the type of the [remote storage] you plan to use, you might need to -install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, -`dvc-oss`, `dvc-ssh`. +install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, `dvc-oss`, +`dvc-ssh`.
diff --git a/content/docs/install/macos.md b/content/docs/install/macos.md index fc71343714..55697d297f 100644 --- a/content/docs/install/macos.md +++ b/content/docs/install/macos.md @@ -59,8 +59,8 @@ $ pip install dvc ``` Depending on the type of the [remote storage] you plan to use, you might need to -install optional dependencies: `[s3]`, `[gdrive]`, `[gs]`, `[azure]`, `[ssh]`, -`[hdfs]`, `[webdav]`, `[oss]`. Use `[all]` to include them all. +install optional dependencies: `[s3]`, `[gdrive]`, `[gs]`, `[azure]`, `[ssh]`, `[hdfs]`, +`[webdav]`, `[oss]`. Use `[all]` to include them all. [remote storage]: /doc/user-guide/data-management/remote-storage @@ -91,8 +91,8 @@ $ mamba install -c conda-forge dvc ``` Depending on the type of the [remote storage] you plan to use, you might need to -install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, -`dvc-oss`, `dvc-ssh`. +install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, `dvc-oss`, +`dvc-ssh`.
diff --git a/content/docs/install/windows.md b/content/docs/install/windows.md index e44c469f51..3f283c9557 100644 --- a/content/docs/install/windows.md +++ b/content/docs/install/windows.md @@ -43,8 +43,8 @@ $ mamba install -c conda-forge dvc ``` Depending on the type of the [remote storage] you plan to use, you might need to -install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, -`dvc-oss`, `dvc-ssh`. +install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, `dvc-oss`, +`dvc-ssh`. [remote storage]: /doc/user-guide/data-management/remote-storage @@ -84,8 +84,8 @@ $ pip install dvc ``` Depending on the type of the [remote storage] you plan to use, you might need to -install optional dependencies: `[s3]`, `[azure]`, `[gdrive]`, `[gs]`, `[oss]`, -`[ssh]`. Use `[all]` to include them all. +install optional dependencies: `[s3]`, `[azure]`, `[gdrive]`, `[gs]`, `[oss]`, `[ssh]`. +Use `[all]` to include them all.
diff --git a/content/docs/start/data-pipelines/data-pipelines.md b/content/docs/start/data-pipelines/data-pipelines.md index 850416d1d6..43487f66ad 100644 --- a/content/docs/start/data-pipelines/data-pipelines.md +++ b/content/docs/start/data-pipelines/data-pipelines.md @@ -288,8 +288,8 @@ reproduction's results. ### 💡 Expand to get a peek under the hood -`dvc repro` relies on the [dependency graph] of stages defined in `dvc.yaml`, -and uses `dvc.lock` to determine what exactly needs to be run. +`dvc repro` relies on the [dependency graph] of stages defined in `dvc.yaml`, and +uses `dvc.lock` to determine what exactly needs to be run. The `dvc.lock` file is similar to a `.dvc` file — it captures hashes (in most cases `md5`s) of the dependencies and values of the parameters that were used. diff --git a/content/docs/start/data-pipelines/index.md b/content/docs/start/data-pipelines/index.md index ddc800d2d0..74959c9b0d 100644 --- a/content/docs/start/data-pipelines/index.md +++ b/content/docs/start/data-pipelines/index.md @@ -9,12 +9,12 @@ description: ## Chapters -- **[Data pipelines]** - Use pipelines to describe how models and other data - artifacts are built, and provide an efficient way to reproduce them. Think - "Makefiles for data and ML projects" done right. +- **[Data pipelines]** - Use pipelines to describe how models and other data artifacts + are built, and provide an efficient way to reproduce them. Think "Makefiles for + data and ML projects" done right. -- **[Metrics, parameters, and plots]** - These are first class citizens in DVC - pipelines. Capture, evaluate, and visualize ML projects without leaving Git. +- **[Metrics, parameters, and plots]** - These are first class citizens in DVC pipelines. + Capture, evaluate, and visualize ML projects without leaving Git. [data pipelines]: /doc/start/data-management/data-pipelines [metrics, parameters, and plots]: diff --git a/content/docs/start/experiments/index.md b/content/docs/start/experiments/index.md index 0e9130a4c9..d5737e447c 100644 --- a/content/docs/start/experiments/index.md +++ b/content/docs/start/experiments/index.md @@ -11,9 +11,9 @@ description: ## Chapters -- **[Experiment tracking]** - Instrument your code to quickly start tracking - experiments. Manage changes to code, data, metrics, parameters and plots - associated with each experiment without bloating your Git repo. +- **[Experiment tracking]** - Instrument your code to quickly start tracking experiments. + Manage changes to code, data, metrics, parameters and plots associated with each + experiment without bloating your Git repo. - **[Experimenting using pipelines]** - Leverage DVC data pipelines as an experiment management system. Split your workflow into stages, track diff --git a/content/docs/start/index.md b/content/docs/start/index.md index b90e433885..d601eebd8b 100644 --- a/content/docs/start/index.md +++ b/content/docs/start/index.md @@ -91,8 +91,7 @@ $ dvc get https://github.com/iterative/dataset-registry \ We used `dvc get` above to show how DVC can turn any Git repo into a "[data -registry]". `dvc get` can download any file or directory tracked in a DVC -repository. +registry]". `dvc get` can download any file or directory tracked in a DVC repository. [data registry]: /doc/use-cases/data-registry @@ -179,8 +178,8 @@ $ dvc remote add -d myremote %TEMP%\dvcstore -DVC supports many remote [storage types], including Amazon S3, NFS, SSH, Google -Drive, Azure Blob Storage, and HDFS. +DVC supports many remote [storage types], including Amazon S3, NFS, SSH, Google Drive, +Azure Blob Storage, and HDFS. An example for a common use case is configuring an [Amazon S3] remote: @@ -364,12 +363,12 @@ Choose a trail to jump into its first chapter: pipelines. - **[Experiment Management]** - Easily track your experiments and their progress - by only instrumenting your code, and collaborate on ML experiments like - software engineers do for code. + by only instrumenting your code, and collaborate on ML experiments like software + engineers do for code. -- **[Model Registry]** - Use the DVC model registry to manage the lifecycle of - your models in an auditable way. Easily access your models and integrate your - model registry actions into CICD pipelines to follow GitOps best practices. +- **[Model Registry]** - Use the DVC model registry to manage the lifecycle of your + models in an auditable way. Easily access your models and integrate your model + registry actions into CICD pipelines to follow GitOps best practices. [Data Pipelines]: /doc/start/data-management/data-pipelines [Experiment Management]: /doc/start/experiments/experiment-tracking diff --git a/content/docs/start/model-registry/index.md b/content/docs/start/model-registry/index.md index d5e81c5ca4..f6ea5c89cd 100644 --- a/content/docs/start/model-registry/index.md +++ b/content/docs/start/model-registry/index.md @@ -14,13 +14,12 @@ https://www.youtube.com/watch?v=T7MBFpnSr9Q - **[Add a model]** - Start tracking model artifacts with DVC. -- **[Manage models in a central registry]** - Manage models, their versions and - lifecycle stages in a git-based model registry. +- **[Manage models in a central registry]** - Manage models, their versions and lifecycle + stages in a git-based model registry. -- **[Use and deploy models]** - Easily download your models from the model - registry. Set up your CICD pipelines to be trigger by model registry actions - (such as assigning model stages) and deploy models directly form the model - registry. +- **[Use and deploy models]** - Easily download your models from the model registry. + Set up your CICD pipelines to be trigger by model registry actions (such as assigning + model stages) and deploy models directly form the model registry. [Add a model]: /doc/start/model-registry/add-model [Manage models in a central registry]: /doc/start/model-registry/manage-models diff --git a/content/docs/studio/rest-api/create-project.md b/content/docs/studio/rest-api/create-project.md index 1e7c82a58e..35c1b90093 100644 --- a/content/docs/studio/rest-api/create-project.md +++ b/content/docs/studio/rest-api/create-project.md @@ -12,8 +12,8 @@ Content-Type: application/json ## Authentication -The request should contain following header containing [DVC Studio client access -token] with `PROJECT` scope for authorization. +The request should contain following header containing [DVC Studio client +access token] with `PROJECT` scope for authorization. | header | desc | example value | | ------------- | --------------- | ----------------------- | diff --git a/content/docs/studio/user-guide/account-management/index.md b/content/docs/studio/user-guide/account-management/index.md index ebcb533fad..b7cd7e7ec8 100644 --- a/content/docs/studio/user-guide/account-management/index.md +++ b/content/docs/studio/user-guide/account-management/index.md @@ -152,8 +152,8 @@ granting full access to your Studio account. The available scopes are: -- `Experiment operations` - DVC uses this scope to share [live experiments] and - to notify [Studio](https://studio.iterative.ai/) about [pushed experiments]. +- `Experiment operations` - DVC uses this scope to share [live experiments] and to + notify [Studio](https://studio.iterative.ai/) about [pushed experiments]. - `Dataset operations` - [Coming soon](https://cloud.dvc.ai). - `Model registry operations` - like downloading model using `dvc artifacts get`. diff --git a/content/docs/studio/user-guide/experiments/live-metrics-and-plots.md b/content/docs/studio/user-guide/experiments/live-metrics-and-plots.md index b7bd790e75..6539b54d1d 100644 --- a/content/docs/studio/user-guide/experiments/live-metrics-and-plots.md +++ b/content/docs/studio/user-guide/experiments/live-metrics-and-plots.md @@ -44,9 +44,9 @@ steps: If the code is running outside of your Git repository (for example, in -[Databricks] or [SageMaker jobs]), you lose the benefit of automatically -tracking metrics and plots with Git, but you can send live updates to Studio if -you set the `DVC_STUDIO_TOKEN` and `DVC_EXP_GIT_REMOTE` environment variables: +[Databricks] or [SageMaker jobs]), you lose the benefit of automatically tracking +metrics and plots with Git, but you can send live updates to Studio if you set the +`DVC_STUDIO_TOKEN` and `DVC_EXP_GIT_REMOTE` environment variables: ```cli $ export DVC_STUDIO_TOKEN="" @@ -64,9 +64,9 @@ $ export DVC_EXP_GIT_REMOTE="https://github.com//" In the training job (which has been configured as detailed above), whenever you log your metrics or plots using [DVCLive], they will be automatically sent to -DVC Studio. See [DVC config] for how to enable/disable live experiment updates -and how to configure a different DVC Studio URL or Git repository. Here is an -example of how you can use [DVCLive] in your training code: +DVC Studio. See [DVC config] for how to enable/disable live experiment updates and +how to configure a different DVC Studio URL or Git repository. Here is an example +of how you can use [DVCLive] in your training code: ```py from dvclive import Live diff --git a/content/docs/studio/user-guide/experiments/visualize-and-compare.md b/content/docs/studio/user-guide/experiments/visualize-and-compare.md index 26c7d8a843..c7694c1ba0 100644 --- a/content/docs/studio/user-guide/experiments/visualize-and-compare.md +++ b/content/docs/studio/user-guide/experiments/visualize-and-compare.md @@ -12,8 +12,7 @@ plot examples are AUC curves, loss functions, and confusion matrices. The easiest way to start is with [DVCLive], which will automatically generate plots data and configure them to be visualized. -DVC Studio supports all [DVC plots], which can plot two types of files in your -repository: +DVC Studio supports all [DVC plots], which can plot two types of files in your repository: 1. Data series files, which can be JSON, YAML, CSV or TSV. Data from these files will populate your AUC curves, loss functions, confusion matrices and other diff --git a/content/docs/studio/user-guide/model-registry/use-models.md b/content/docs/studio/user-guide/model-registry/use-models.md index f6001e8c6e..d5497d443e 100644 --- a/content/docs/studio/user-guide/model-registry/use-models.md +++ b/content/docs/studio/user-guide/model-registry/use-models.md @@ -18,10 +18,10 @@ Prerequisites: - Access to your [DVC Studio client access token] with Model registry operations scope. -Without these prerequisites, you can still [download a model artifact with DVC]. -However, it can be easier to use the DVC Studio API since you only need to have -the Studio access token. You do not need direct access to your remote storage or -Git repository, and you do not need to install DVC. +Without these prerequisites, you can still [download a model artifact with +DVC]. However, it can be easier to use the DVC Studio API since you only need to +have the Studio access token. You do not need direct access to your remote storage +or Git repository, and you do not need to install DVC. [remote]: /doc/user-guide/data-management/remote-storage [remote storage credentials]: diff --git a/content/docs/studio/user-guide/model-registry/view-and-compare-models.md b/content/docs/studio/user-guide/model-registry/view-and-compare-models.md index 0755ad7b5f..56c9476c7e 100644 --- a/content/docs/studio/user-guide/model-registry/view-and-compare-models.md +++ b/content/docs/studio/user-guide/model-registry/view-and-compare-models.md @@ -19,8 +19,8 @@ framework, repository, etc. DVC Studio consolidates the stages of all the models in the registry, and provides a way to filter models by stages. -You can take a look at the [models dashboard] in Iterative's public (read only) -model registry. +You can take a look at the [models dashboard] in Iterative's public (read only) model +registry. ## Model details page: diff --git a/content/docs/use-cases/ci-cd-for-machine-learning.md b/content/docs/use-cases/ci-cd-for-machine-learning.md index 4c85b9a736..24de076832 100644 --- a/content/docs/use-cases/ci-cd-for-machine-learning.md +++ b/content/docs/use-cases/ci-cd-for-machine-learning.md @@ -52,9 +52,9 @@ configuration. Here are a few feature highlights: **Models, Data, and Metrics as Code**: DVC removes the need to create versioning databases, use special file/folder structures, or write bespoke interfacing code. Instead, DVC stores meta-information in Git ("codifying" data and ML -models) while pushing the actual data content to [cloud storage]. DVC also -provides metrics-driven navigation in Git repositories -- [tabulating and -plotting] model metrics changes across commits. +models) while pushing the actual data content to [cloud storage]. DVC also provides +metrics-driven navigation in Git repositories -- [tabulating and plotting] model +metrics changes across commits. [cloud storage]: /doc/user-guide/data-management/remote-storage [tabulating and plotting]: /doc/start/data-management/metrics-parameters-plots diff --git a/content/docs/use-cases/data-registry/index.md b/content/docs/use-cases/data-registry/index.md index f616d9b383..1b06ac2ad3 100644 --- a/content/docs/use-cases/data-registry/index.md +++ b/content/docs/use-cases/data-registry/index.md @@ -28,16 +28,15 @@ cloud storage. Advantages: - **Data as code**: Leverage Git workflow benefits such as having a commit history, branching, pull requests, reviews, and even [CI/CD for your data and models lifecycle]. Think "Git for cloud storage". -- **Security**: DVC-controlled [remote storage] (e.g. Amazon S3) can be - configured to limit data access. For example, you can setup read-only - endpoints (e.g. an HTTP server) to prevent data deletions or alterations. +- **Security**: DVC-controlled [remote storage] (e.g. Amazon S3) can be configured + to limit data access. For example, you can setup read-only endpoints (e.g. an HTTP + server) to prevent data deletions or alterations. [ci/cd for your data and models lifecycle]: /doc/use-cases/ci-cd-for-machine-learning [remote storage]: /doc/user-guide/data-management/remote-storage -👩‍💻 Intrigued? Try our [registry tutorial] to learn how DVC looks and feels -firsthand. +👩‍💻 Intrigued? Try our [registry tutorial] to learn how DVC looks and feels firsthand. [registry tutorial]: /doc/use-cases/data-registry/tutorial diff --git a/content/docs/use-cases/data-registry/tutorial.md b/content/docs/use-cases/data-registry/tutorial.md index 5295cfd866..b0ff0982b0 100644 --- a/content/docs/use-cases/data-registry/tutorial.md +++ b/content/docs/use-cases/data-registry/tutorial.md @@ -29,8 +29,8 @@ $ git commit -m "Track 1.8 GB 10,000 song dataset in music/" ``` The actual data is stored in the project's cache, and can be -[pushed](/doc/command-reference/push) to one or more [remote storage] locations -so the registry can be accessed from other locations and by other people: +[pushed](/doc/command-reference/push) to one or more [remote storage] locations so +the registry can be accessed from other locations and by other people: ```cli $ dvc remote add -d myremote s3://mybucket/dvcstore @@ -174,8 +174,8 @@ $ tree --filelimit=10 ... ``` -And let's not forget to `dvc push` data changes to the [remote storage], so -others can obtain them! +And let's not forget to `dvc push` data changes to the [remote storage], so others +can obtain them! ```cli $ dvc push @@ -187,8 +187,7 @@ Now you know how to to build a lightweight data registry, update it, and get files from it. As your registry or team continues to grow, you may have trouble managing all artifacts across multiple projects. How do you keep them organized, or know which version to use, or share them with others outside your team? DVC -along with [DVC Studio] can help you scale your registry and address these -questions. +along with [DVC Studio] can help you scale your registry and address these questions. ### Adding metadata @@ -209,18 +208,18 @@ artifacts: ``` Once you `git commit` and `git push` this info to a project that's connected to -[DVC Studio], anyone on your team can see it and filter or search across all -your projects in the model registry. Although the artifact above is -not a model, you can change the filters to use it for any type of artifact: +[DVC Studio], anyone on your team can see it and filter or search across all your +projects in the model registry. Although the artifact above is not a +model, you can change the filters to use it for any type of artifact: ![Show Registry Datasets](https://static.iterative.ai/img/registry-show-datasets.gif) ### Registering versions and assigning stages -[Version numbers] and [stages] signal the commit to use and can trigger -automated workflows. Just like with software, you can use [semantic versioning] -to tag releases of your artifacts and to mark artifact versions as in -production, development, or other stages of their lifecycle: +[Version numbers] and [stages] signal the commit to use and can trigger automated +workflows. Just like with software, you can use [semantic versioning] to tag +releases of your artifacts and to mark artifact versions as in production, +development, or other stages of their lifecycle: ![Assign Registry Datasets](https://static.iterative.ai/img/registry-assign-datasets.gif) @@ -230,10 +229,12 @@ actions in your CI/CD workflows when you register a version or assign a stage. ### Accessing artifacts -Others can [download or stream artifacts] by their version or stage without -needing access to your Git repository or cloud storage. If you connect your -[cloud credentials] in [DVC Studio], anyone on your team can access that -artifact using only a [Studio token], either in the UI or programmatically: +Others can [download or stream artifacts] by their version or stage without needing +access to your Git repository or cloud storage. If you connect your [cloud +credentials] +in [DVC Studio], anyone on your team can access that artifact using only a [Studio +token], +either in the UI or programmatically: ![Download Registry Datasets](https://static.iterative.ai/img/registry-download-datasets.gif) diff --git a/content/docs/use-cases/experiment-tracking.md b/content/docs/use-cases/experiment-tracking.md index e47c9ff7fe..704d74a939 100644 --- a/content/docs/use-cases/experiment-tracking.md +++ b/content/docs/use-cases/experiment-tracking.md @@ -11,9 +11,9 @@ resume a line of work. (usually with notebooks or speadsheets)_ DVC provides a layer of [experiment management] features out-of-the-box (no need -for special servers or websites). Running **DVC Experiments** in your workspace -captures relevant changesets automatically (input data, source code, -hyperparameters, artifacts, etc.). +for special servers or websites). Running **DVC Experiments** in your workspace captures +relevant changesets automatically (input data, source code, hyperparameters, artifacts, +etc.). Other tools tend to focus on experiment navigation by saving metrics and artifacts that result from your experiments, along with fragile links to code @@ -29,8 +29,8 @@ instead, and not as a separate system. [VS Code extension][ide] (shown) or [DVC Studio]._ When you are ready to share, [DVC Studio] can be the central hub for your team's -projects, experiments, and models. DVC Studio also gives you the power to run -experiments in the cloud. +projects, experiments, and models. DVC Studio also gives you the power to run experiments +in the cloud. Major benefits of tracking experiments with DVC: diff --git a/content/docs/use-cases/fast-data-caching-hub.md b/content/docs/use-cases/fast-data-caching-hub.md index 5af11eb0b8..a599ac5d72 100644 --- a/content/docs/use-cases/fast-data-caching-hub.md +++ b/content/docs/use-cases/fast-data-caching-hub.md @@ -27,9 +27,9 @@ to ![](/img/storage-layers.png) _Data storage middleware for multiple projects_ -You can have a single storage for all you projects by setting up a [shared DVC -cache] in a near location (network, external drive, etc.). This de-duplicates -files across datasets and prevents repetitive transfers by +You can have a single storage for all you projects by setting up a [shared +DVC cache] in a near location (network, external drive, etc.). This +de-duplicates files across datasets and prevents repetitive transfers by [linking](/doc/user-guide/data-management/large-dataset-optimization) your working files and directories. Data security policies can be implemented reliably, as data never leaves the central storage. DVC can also help you back @@ -45,8 +45,8 @@ without having to change the directory structures or code of your projects. ### What's next? -For details about how DVC caches your files and directories, see [Structure of -the cache directory]. If you're completely new to DVC, see our +For details about how DVC caches your files and directories, see [Structure +of the cache directory]. If you're completely new to DVC, see our [Get Started](/doc/start) pages to get familiar with the main features that structured storage and [data versioning](/doc/use-cases/versioning-data-and-models) allow. And check diff --git a/content/docs/use-cases/model-registry.md b/content/docs/use-cases/model-registry.md index e8bfd940c7..7e7a053692 100644 --- a/content/docs/use-cases/model-registry.md +++ b/content/docs/use-cases/model-registry.md @@ -2,9 +2,9 @@ A **model registry** is a tool to catalog ML models and their versions. Models from your data science projects can be discovered, tested, shared, deployed, and -audited from there. [DVC Studio] model registry enables these capabilities [on -top of Git][gitops], so you can stick to an existing software engineering stack. -No more division between ML engineering and operations! +audited from there. [DVC Studio] model registry enables these capabilities [on top +of Git][gitops], so you can stick to an existing software engineering stack. No more +division between ML engineering and operations! ![](/img/ml_model_registry.jpg) _MLOps from modeling to production_ diff --git a/content/docs/use-cases/versioning-data-and-models/index.md b/content/docs/use-cases/versioning-data-and-models/index.md index 333ee91655..7264179fd7 100644 --- a/content/docs/use-cases/versioning-data-and-models/index.md +++ b/content/docs/use-cases/versioning-data-and-models/index.md @@ -88,7 +88,8 @@ enforce them. And this is just the beginning. DVC supports multiple advanced features out-of-the-box: Build, run, and versioning [data pipelines], [manage -experiments] effectively, and more. +experiments] +effectively, and more. [data pipelines]: /doc/command-reference/dag [manage experiments]: /doc/start/experiments diff --git a/content/docs/use-cases/versioning-data-and-models/tutorial.md b/content/docs/use-cases/versioning-data-and-models/tutorial.md index 58dc3e1ee3..eb9bc90ab0 100644 --- a/content/docs/use-cases/versioning-data-and-models/tutorial.md +++ b/content/docs/use-cases/versioning-data-and-models/tutorial.md @@ -357,8 +357,8 @@ Another detail we only brushed upon here is the way we captured the `metrics.csv` metrics file with the `-M` option of `dvc stage add`. Marking this output as a metric enables us to compare its values across Git tags or branches (for example, representing different experiments). See -`dvc metrics`, [Comparing Changes], and [Comparing Many Experiments] to learn -more about managing metrics with DVC. +`dvc metrics`, [Comparing Changes], and [Comparing Many Experiments] to learn more +about managing metrics with DVC. [comparing changes]: /doc/start/data-management/metrics-parameters-plots#comparing-iterations diff --git a/content/docs/user-guide/basic-concepts/dvc-project.md b/content/docs/user-guide/basic-concepts/dvc-project.md index e0a923fc08..f813f6e350 100644 --- a/content/docs/user-guide/basic-concepts/dvc-project.md +++ b/content/docs/user-guide/basic-concepts/dvc-project.md @@ -15,10 +15,10 @@ considered part of the project (e.g. ## DVC repository A DVC project in a Git repository can also be called a _DVC repository_ or "the -repo". This setup enables the [versioning features] of DVC (recommended). Files -tracked by Git are considered part of the DVC project when referenced from DVC -metafiles such as `dvc.lock`; for example source code that is used as a -stage command (`cmd` field in `dvc.yaml`). +repo". This setup enables the [versioning features] of DVC (recommended). Files tracked +by Git are considered part of the DVC project when referenced from DVC metafiles +such as `dvc.lock`; for example source code that is used as a stage +command (`cmd` field in `dvc.yaml`). [versioning features]: /doc/start/data-management/data-versioning diff --git a/content/docs/user-guide/basic-concepts/workspace.md b/content/docs/user-guide/basic-concepts/workspace.md index 7b7ab3b780..21cbacbc0f 100644 --- a/content/docs/user-guide/basic-concepts/workspace.md +++ b/content/docs/user-guide/basic-concepts/workspace.md @@ -7,11 +7,10 @@ Adding versioning needs and dependency management can easily turn this near impossible. A DVC project structure is simplified by encapsulating [data -versioning] and [pipelining] (e.g. machine learning workflows), among other -features. This leaves a _workspace_ directory with a clean view of your working -raw data, source code, data artifacts, etc. and a few -[metafiles](/doc/user-guide/project-structure) that enable these features. A -single version of the project is visible at a time. +versioning] and [pipelining] (e.g. machine learning workflows), among other features. +This leaves a _workspace_ directory with a clean view of your working raw data, source +code, data artifacts, etc. and a few [metafiles](/doc/user-guide/project-structure) +that enable these features. A single version of the project is visible at a time. [data versioning]: /doc/start/data-management/data-versioning [pipelining]: /doc/start/data-management/data-pipelines diff --git a/content/docs/user-guide/data-management/cloud-versioning.md b/content/docs/user-guide/data-management/cloud-versioning.md index e9bf6cb667..1185f7ba41 100644 --- a/content/docs/user-guide/data-management/cloud-versioning.md +++ b/content/docs/user-guide/data-management/cloud-versioning.md @@ -12,8 +12,8 @@ Cloud versioning features are only avaible for certain storage providers. Currently, it is supported on the following storage types: - [Amazon S3] (requires [S3 Versioning] enabled buckets) -- Microsoft [Azure Blob Storage] (requires [Blob versioning] enabled storage - accounts and containers) +- Microsoft [Azure Blob Storage] (requires [Blob versioning] enabled storage accounts + and containers) - [Google Cloud Storage] (requires [Object versioning] enabled buckets) [amazon s3]: /doc/user-guide/data-management/remote-storage/amazon-s3 diff --git a/content/docs/user-guide/data-management/discovering-and-accessing-data.md b/content/docs/user-guide/data-management/discovering-and-accessing-data.md index 3ebc66d3c0..1e19e91a0e 100644 --- a/content/docs/user-guide/data-management/discovering-and-accessing-data.md +++ b/content/docs/user-guide/data-management/discovering-and-accessing-data.md @@ -10,9 +10,8 @@ projects? These questions tend to come up when you browse the files that DVC saves to -[remote storage] (e.g. -`s3://dvc-public/remote/get-started/fb/89904ef053f04d64eafcc3d70db673` 😱 -instead of the original file name such as `model.pkl` or `data.xml`). +[remote storage] (e.g. `s3://dvc-public/remote/get-started/fb/89904ef053f04d64eafcc3d70db673` +😱 instead of the original file name such as `model.pkl` or `data.xml`). [remote storage]: /doc/user-guide/data-management/remote-storage @@ -86,9 +85,8 @@ bring in changes from the data source later using `dvc update`. -The [dataset registry] repository doesn't actually contain a -`get-started/data.xml` file. Like `dvc get`, `dvc import` downloads from [remote -storage]. +The [dataset registry] repository doesn't actually contain a `get-started/data.xml` +file. Like `dvc get`, `dvc import` downloads from [remote storage]. [dataset registry]: https://github.com/iterative/dataset-registry @@ -142,9 +140,9 @@ path. -The [DVC Studio] model registry was built for models but since DVC tracks all -kinds of files, it can be used just as easily for other artifact types. See our -[tutorial] for how to manage artifacts using the registry. +The [DVC Studio] model registry was built for models but since DVC tracks all kinds +of files, it can be used just as easily for other artifact types. See our [tutorial] +for how to manage artifacts using the registry. @@ -153,9 +151,9 @@ semantic versions can be registered and lifecycle stages (think `dev`/`test`/`prod`) can be assigned using Git tags managed by [GTO](/doc/gto). These tags give you a full history of your model lifecycle in Git and enable you to trigger CICD workflows based on changes in the model registry. With [DVC -Studio], you can see models and their metadata across all projects, and you can -download artifacts by name, version, and lifecycle stage, without needing to -configure access to the underlying Git repository or remote storage. +Studio], you can see models and their metadata across all projects, and you can download +artifacts by name, version, and lifecycle stage, without needing to configure access +to the underlying Git repository or remote storage. [DVC Studio]: https://studio.iterative.ai [tutorial]: /doc/use-cases/data-registry/tutorial#sharing-and-managing-artifacts diff --git a/content/docs/user-guide/data-management/importing-external-data.md b/content/docs/user-guide/data-management/importing-external-data.md index 960358a134..78fb4db582 100644 --- a/content/docs/user-guide/data-management/importing-external-data.md +++ b/content/docs/user-guide/data-management/importing-external-data.md @@ -2,8 +2,8 @@ To version data that lives outside of your local project, you can import it. You can choose whether to download that data and whether to push -copies to your [DVC remote]. This makes importing the data useful even if you -want to track the data in-place at its original source location. +copies to your [DVC remote]. This makes importing the data useful even if you want +to track the data in-place at its original source location. @@ -57,8 +57,8 @@ $ dvc update data.xml.dvc ``` During `dvc push`, DVC will upload the version of the data tracked by -`data.xml.dvc` to the [DVC remote] so that it is backed up in case you need to -recover it. +`data.xml.dvc` to the [DVC remote] so that it is backed up in case you need to recover +it. DVC will never overwrite the source location of the data. Instead, DVC can checkout any version of that data locally. DVC is designed to protect the @@ -108,12 +108,11 @@ Everything is up to date. ### Example: Cloud versioning -If you are importing from a supported [cloud versioning] provider, -`dvc import-url --no-download --version-aware` will not download the data -locally but will track the cloud provider's version IDs for the data. `dvc pull` -will try to download those version IDs as long as they are available. `dvc push` -will not upload anything because DVC assumes the versions are available at the -source location: +If you are importing from a supported [cloud versioning] provider, `dvc import-url --no-download --version-aware` +will not download the data locally but will track the cloud provider's version IDs +for the data. `dvc pull` will try to download those version IDs as long as they are +available. `dvc push` will not upload anything because DVC assumes the versions are +available at the source location: ```cli $ dvc import-url --no-download --version-aware s3://myversionedbucket/data.xml diff --git a/content/docs/user-guide/data-management/modifying-large-datasets.md b/content/docs/user-guide/data-management/modifying-large-datasets.md index 6b29b4aea3..d812ee0630 100644 --- a/content/docs/user-guide/data-management/modifying-large-datasets.md +++ b/content/docs/user-guide/data-management/modifying-large-datasets.md @@ -110,9 +110,9 @@ it is. ## Modifying remote datasets -If your dataset is in [remote storage] but not downloaded to your workspace, -it's inconvenient to `dvc pull` the entire dataset to update only one or a few -files. Instead, you can pull only the files you want to update: +If your dataset is in [remote storage] but not downloaded to your workspace, it's +inconvenient to `dvc pull` the entire dataset to update only one or a few files. +Instead, you can pull only the files you want to update: ```cli $ tree diff --git a/content/docs/user-guide/data-management/remote-storage/amazon-s3.md b/content/docs/user-guide/data-management/remote-storage/amazon-s3.md index 3c341f3b5a..7bfae5fed3 100644 --- a/content/docs/user-guide/data-management/remote-storage/amazon-s3.md +++ b/content/docs/user-guide/data-management/remote-storage/amazon-s3.md @@ -145,8 +145,8 @@ See `dvc remote modify` for more command usage details. ssl_verify 'path/to/ca_bundle.pem' ``` -- `sse` (`AES256` or `aws:kms`) - [server-side encryption] algorithm to use. - None by default +- `sse` (`AES256` or `aws:kms`) - [server-side encryption] algorithm to use. None + by default ```cli $ dvc remote modify myremote sse 'AES256' diff --git a/content/docs/user-guide/data-management/remote-storage/azure-blob-storage.md b/content/docs/user-guide/data-management/remote-storage/azure-blob-storage.md index 073160014a..000d026f43 100644 --- a/content/docs/user-guide/data-management/remote-storage/azure-blob-storage.md +++ b/content/docs/user-guide/data-management/remote-storage/azure-blob-storage.md @@ -36,9 +36,8 @@ account. A storage account name (`account_name`) is always needed. DVC tries to -authenticate with its [default credential] by default. This uses environment -variables (usually set during [Azure CLI configuration]) or data from certain -Microsoft applications. +authenticate with its [default credential] by default. This uses environment variables +(usually set during [Azure CLI configuration]) or data from certain Microsoft applications. ```cli $ dvc remote modify myremote account_name 'mystorage' @@ -82,8 +81,8 @@ order). The following params are listed in the order in which they are tried. -- A [connection string] (`connection_string`) is used if given (recommended) - (`account_name` is ignored since it's included in the connection string). +- A [connection string] (`connection_string`) is used if given (recommended) (`account_name` + is ignored since it's included in the connection string). ```cli $ dvc remote modify --local myremote \ @@ -202,16 +201,16 @@ See `dvc remote modify` for more command usage details. Default is `false`. - `exclude_visual_studio_code_credential` - If `true`, excludes Visual Studio - Code credential source for Azure Remote. See [Azure credentials - documentation]. Default is `false`. + Code credential source for Azure Remote. See [Azure + credentials documentation]. Default is `false`. - `exclude_shared_token_cache_credential` - If `true`, excludes the shared token - cache credential source for Azure Remote. See [Azure credentials - documentation]. Default is `false`. + cache credential source for Azure Remote. See [Azure + credentials documentation]. Default is `false`. - `exclude_managed_identity_credential` - If `true`, excludes the managed - identity credential source for Azure Remote. See [Azure credentials - documentation]. Default is `false`. + identity credential source for Azure Remote. See [Azure + credentials documentation]. Default is `false`. [Azure credentials documentation]: https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python diff --git a/content/docs/user-guide/data-management/remote-storage/google-cloud-storage.md b/content/docs/user-guide/data-management/remote-storage/google-cloud-storage.md index 77a665fcbc..1ec4617c5a 100644 --- a/content/docs/user-guide/data-management/remote-storage/google-cloud-storage.md +++ b/content/docs/user-guide/data-management/remote-storage/google-cloud-storage.md @@ -19,8 +19,8 @@ Upon `dvc push` (or when needed), DVC will try to authenticate using your -Make sure to run [gcloud auth application-default login] unless you use a -service account or other ways to authenticate ([more info]). +Make sure to run [gcloud auth application-default login] unless you use a service +account or other ways to authenticate ([more info]). @@ -38,9 +38,8 @@ remote, set any supported config param with `dvc remote modify`. ## Custom authentication -For [service accounts] (a Google account associated to your GCP project instead -of a user), you can set the path to the file that contains a [service account -key]: +For [service accounts] (a Google account associated to your GCP project instead of +a user), you can set the path to the file that contains a [service account key]: [service accounts]: https://cloud.google.com/iam/docs/service-accounts [service account key]: diff --git a/content/docs/user-guide/data-management/remote-storage/google-drive.md b/content/docs/user-guide/data-management/remote-storage/google-drive.md index ef06fe353f..6155011cbe 100644 --- a/content/docs/user-guide/data-management/remote-storage/google-drive.md +++ b/content/docs/user-guide/data-management/remote-storage/google-drive.md @@ -197,12 +197,11 @@ On the first usage of a GDrive remote, for example when trying to `dvc push` tracked data for the first time, DVC will prompt you to visit a special Google authentication web page. There you'll need to sign into a Google account with the needed access to the GDrive [URL](#url-format) in question. The [auth -process] will ask you to grant DVC the necessary permissions, and produce a -verification code needed for DVC to complete the connection. On success, the -necessary credentials will be cached globally, for example in -`~/Library/Caches/pydrive2fs/{gdrive_client_id}/default.json` for macOS ([see -`gdrive_user_credentials_file`]), and used automatically next time DVC needs -them. +process] will ask you to grant DVC the necessary permissions, and produce a verification +code needed for DVC to complete the connection. On success, the necessary credentials +will be cached globally, for example in `~/Library/Caches/pydrive2fs/{gdrive_client_id}/default.json` +for macOS ([see `gdrive_user_credentials_file`]), and used automatically next +time DVC needs them. [auth process]: https://developers.google.com/drive/api/v2/about-auth [see `gdrive_user_credentials_file`]: #configuration-parameters diff --git a/content/docs/user-guide/data-management/remote-storage/hdfs.md b/content/docs/user-guide/data-management/remote-storage/hdfs.md index d4f1662a41..5c6a68ba62 100644 --- a/content/docs/user-guide/data-management/remote-storage/hdfs.md +++ b/content/docs/user-guide/data-management/remote-storage/hdfs.md @@ -106,8 +106,8 @@ them with the `--local` option, so they're written to a Git-ignored config file. $ dvc remote modify myremote kerberos true ``` -- `kerberos_principal` - [Kerberos principal] to use, in case you have multiple - ones (for example service accounts). Only used if `kerberos` is `true`. +- `kerberos_principal` - [Kerberos principal] to use, in case you have multiple ones + (for example service accounts). Only used if `kerberos` is `true`. ```cli $ dvc remote modify myremote kerberos_principal myprincipal diff --git a/content/docs/user-guide/data-management/remote-storage/index.md b/content/docs/user-guide/data-management/remote-storage/index.md index 052c2482cf..fe6ef0e867 100644 --- a/content/docs/user-guide/data-management/remote-storage/index.md +++ b/content/docs/user-guide/data-management/remote-storage/index.md @@ -8,8 +8,8 @@ regenerate them locally. See also `dvc push` and `dvc pull`. -DVC remotes are similar to [Git remotes] (e.g. GitHub or GitLab hosting), but -for cached data instead of code. +DVC remotes are similar to [Git remotes] (e.g. GitHub or GitLab hosting), but for +cached data instead of code. [git remotes]: https://git-scm.com/book/en/v2/Git-Basics-Working-with-Remotes diff --git a/content/docs/user-guide/data-management/remote-storage/ssh.md b/content/docs/user-guide/data-management/remote-storage/ssh.md index e64e997e40..457040b028 100644 --- a/content/docs/user-guide/data-management/remote-storage/ssh.md +++ b/content/docs/user-guide/data-management/remote-storage/ssh.md @@ -20,9 +20,9 @@ SSH, like FTP (simple file transfer protocol) which becomes secure or [SFTP].
DVC will act as an SSH/SFTP client, which means that the remote storage should -be located in an [SSH server]. Use `dvc remote add` to define the remote by -setting a name and valid [SSH URL] (which may include some auth info. like user -name or port): +be located in an [SSH server]. Use `dvc remote add` to define the remote by setting +a name and valid [SSH URL] (which may include some auth info. like user name or +port): ```cli $ dvc remote add -d myremote ssh://user@example.com:2222/path @@ -99,9 +99,9 @@ $ dvc remote modify myremote ask_password true - `url` - modify the remote location ([scroll up](#amazon-s3) for details) -- `allow_agent` - whether to use [SSH agents] (`true` by default). Setting this - to `false` is useful when `ssh-agent` is causing problems, e.g. "No existing - session" errors. +- `allow_agent` - whether to use [SSH agents] (`true` by default). Setting this to + `false` is useful when `ssh-agent` is causing problems, e.g. "No existing session" + errors. - `gss_auth` - use Generic Security Service auth if available on host (for example, [with Kerberos]). `false` by default diff --git a/content/docs/user-guide/env.md b/content/docs/user-guide/env.md index b5e56bd63a..872a5c1646 100644 --- a/content/docs/user-guide/env.md +++ b/content/docs/user-guide/env.md @@ -7,8 +7,8 @@ List of environment variables to configure DVC behavior. - `DVC_EXP_BASELINE_REV`: Git revision for the baseline commit from which an experiment derives. Automatically set by DVC. - `DVC_EXP_GIT_REMOTE`: Git remote name or URL used to [push the experiments] - and [send live metrics and plots] to [DVC Studio]. If not specified, push to - `origin`. Overrides `dvc config exp.git_remote`. + and [send live metrics and plots] to [DVC Studio]. If not specified, push to `origin`. + Overrides `dvc config exp.git_remote`. - `DVC_EXP_NAME`: Name of the experiment. Automatically set by DVC. - `DVC_GLOBAL_CONFIG_DIR`: Directory in which DVC will look for global [configuration](/doc/user-guide/project-structure/configuration). @@ -24,10 +24,9 @@ List of environment variables to configure DVC behavior. - `DVC_STUDIO_OFFLINE`: If `true`, disables sharing [live experiments](/doc/studio/user-guide/experiments/live-metrics-and-plots) even if the DVC Studio token is set. Overrides `dvc config studio.offline`. -- `DVC_STUDIO_TOKEN`: Set [DVC Studio] access token to use. Overrides - `dvc config studio.token`. -- `DVC_STUDIO_URL`: Set URL of [DVC Studio] to use (in case of self-hosted DVC - Studio instance). Overrides `dvc config studio.url`. +- `DVC_STUDIO_TOKEN`: Set [DVC Studio] access token to use. Overrides `dvc config studio.token`. +- `DVC_STUDIO_URL`: Set URL of [DVC Studio] to use (in case of self-hosted DVC Studio + instance). Overrides `dvc config studio.url`. - `DVC_SYSTEM_CONFIG_DIR`: Directory in which DVC will look for system [configuration](/doc/user-guide/project-structure/configuration). - `DVC_STAGE`: Contains the stage name for the current DVC stage. Automatically diff --git a/content/docs/user-guide/experiment-management/hydra-composition.md b/content/docs/user-guide/experiment-management/hydra-composition.md index e6a0a6ae60..dbc1feced7 100644 --- a/content/docs/user-guide/experiment-management/hydra-composition.md +++ b/content/docs/user-guide/experiment-management/hydra-composition.md @@ -101,10 +101,9 @@ train: ### Expand to set up a DVC pipeline. -Let's build an [experimental pipeline] with 2 stages. The first one downloads a -dataset and uses the parameters defined in the `dataset` section of -`params.yaml`. The second stage trains an ML model and uses the rest of the -parameters (entire `train` group). +Let's build an [experimental pipeline] with 2 stages. The first one downloads a dataset +and uses the parameters defined in the `dataset` section of `params.yaml`. The second +stage trains an ML model and uses the rest of the parameters (entire `train` group). ```yaml stages: @@ -274,13 +273,13 @@ You can run the same code with or without Hydra (or DVC). You can also reuse You can configure how DVC works with Hydra. -By default, DVC will look for Hydra [config groups] in a `conf` directory, but -you can set a different directory using `dvc config hydra.config_dir other_dir`. -This is equivalent to the `config_path` argument in `@hydra.main()`. +By default, DVC will look for Hydra [config groups] in a `conf` directory, but you +can set a different directory using `dvc config hydra.config_dir other_dir`. This +is equivalent to the `config_path` argument in `@hydra.main()`. -Within that directory, DVC will look for [defaults list] in `config.yaml`, but -you can set a different path using `dvc config hydra.config_name other.yaml`. -This is equivalent to the `config_name` argument in `@hydra.main()`. +Within that directory, DVC will look for [defaults list] in `config.yaml`, but you +can set a different path using `dvc config hydra.config_name other.yaml`. This is +equivalent to the `config_name` argument in `@hydra.main()`. Hydra will automatically discover [plugins] in the `hydra_plugins` directory. By default, DVC will look for `hydra_plugins` in the root directory of the DVC diff --git a/content/docs/user-guide/experiment-management/index.md b/content/docs/user-guide/experiment-management/index.md index 57371dddfd..41e5ffe1c4 100644 --- a/content/docs/user-guide/experiment-management/index.md +++ b/content/docs/user-guide/experiment-management/index.md @@ -12,10 +12,9 @@ Git tree. This prevents bloating your repo with temporary commits and branches. ### ⚙️ How does DVC track experiments? -Experiments are custom [Git references] (found in `.git/refs/exps`) with one or -more commits based on `HEAD`. These commits are hidden and not checked out by -DVC. Note that these are not pushed to Git remotes by default either (see -`dvc exp push`). +Experiments are custom [Git references] (found in `.git/refs/exps`) with one or more +commits based on `HEAD`. These commits are hidden and not checked out by DVC. Note +that these are not pushed to Git remotes by default either (see `dvc exp push`). Note that DVC Experiments require a unique name to identify them. DVC will auto-generate one by default, such as `puffy-daks`. A custom name can be set diff --git a/content/docs/user-guide/experiment-management/running-experiments.md b/content/docs/user-guide/experiment-management/running-experiments.md index 566d647481..28f4256f70 100644 --- a/content/docs/user-guide/experiment-management/running-experiments.md +++ b/content/docs/user-guide/experiment-management/running-experiments.md @@ -59,10 +59,10 @@ run: -DVC observes the [dependency graph] between stages, so it only runs the ones -with changed dependencies or outputs missing from the cache. You -can limit this to certain [reproduction targets] or even single stages -(`--single-item` flag). +DVC observes the [dependency graph] between stages, so it only runs the ones with +changed dependencies or outputs missing from the cache. You can limit +this to certain [reproduction targets] or even single stages (`--single-item` +flag). DVC projects actually support more than one pipeline, in one or more `dvc.yaml` files. The `--all-pipelines` option lets you run them all at @@ -106,15 +106,15 @@ files cannot be restored. ## Tuning (hyper)parameters Parameters are any values used inside your code to tune modeling attributes, or -that affect experiment results in any other way. For example, a [random forest -classifier] may require a _maximum depth_ value. Machine learning +that affect experiment results in any other way. For example, a [random +forest classifier] may require a _maximum depth_ value. Machine learning experimentation often involves defining and searching hyperparameter spaces to improve the resulting model metrics. -Your source code should read params from structured [parameters files] -(`params.yaml` by default). Define them with the `params` field of `dvc.yaml` -for DVC to track them. When a param value has changed, `dvc exp run` invalidates -any stages that depend on it, and reproduces them. +Your source code should read params from structured [parameters files] (`params.yaml` +by default). Define them with the `params` field of `dvc.yaml` for DVC to track them. +When a param value has changed, `dvc exp run` invalidates any stages that depend +on it, and reproduces them. @@ -318,8 +318,7 @@ To clear the experiments queue and start over, use `dvc queue remove --queued`. -For more advanced grid searches, DVC supports complex config via [Hydra -composition]. +For more advanced grid searches, DVC supports complex config via [Hydra composition]. [hydra composition]: /doc/user-guide/experiment-management/hydra-composition diff --git a/content/docs/user-guide/experiment-management/sharing-experiments.md b/content/docs/user-guide/experiment-management/sharing-experiments.md index d9a68d8130..cce00fb45f 100644 --- a/content/docs/user-guide/experiment-management/sharing-experiments.md +++ b/content/docs/user-guide/experiment-management/sharing-experiments.md @@ -1,7 +1,7 @@ # Sharing Experiments -See the video below for how to share experiments using the [DVC Extension] for -VS Code, or keep reading to go deeper. +See the video below for how to share experiments using the [DVC Extension] for VS +Code, or keep reading to go deeper. https://www.youtube.com/watch?v=UMVYjwJtRj0&autoplay=1&mute=1 @@ -33,8 +33,8 @@ when. -`dvc studio login` will set your [access token] to automatically send live -metrics and plots. +`dvc studio login` will set your [access token] to automatically send live metrics +and plots. @@ -53,9 +53,10 @@ While the experiment runs, you will see live updates like this in DVC Studio ### Advanced options and troubleshooting for live metrics and plots -See [DVC config] for how to enable/disable live metrics and how to configure a -different DVC Studio URL or Git repository, or see the DVC Studio guide on [live -experiments] for more information on how to setup, view, and compare. +See [DVC config] for how to enable/disable live metrics and how to configure a different +DVC Studio URL or Git repository, or see the DVC Studio guide on [live +experiments] +for more information on how to setup, view, and compare.
@@ -139,8 +140,8 @@ If you don't know your Git remote, check with `git remote -v` or see [troubleshooting] for problems. By default, DVC will also share cached data that is tracked by DVC, -which requires [remote storage] (e.g. Amazon S3 or SSH). Add the `--no-cache` -flag to exclude sharing cached data. +which requires [remote storage] (e.g. Amazon S3 or SSH). Add the `--no-cache` flag +to exclude sharing cached data. By default, `dvc exp push origin` will push all experiments derived from your current Git commit, but you may specify specific experiments as arguments or use @@ -169,9 +170,8 @@ default value, set the configuration option `exp.git_remote` or the ## Find pushed experiments -You can see pushed experiments in [DVC Studio]. From there, you can make an -experiment [persistent] by creating a Git branch, or you can [remove] it from -your Git remote: +You can see pushed experiments in [DVC Studio]. From there, you can make an experiment +[persistent] by creating a Git branch, or you can [remove] it from your Git remote: ![DVC Studio Shared Experiments](/img/studio-shared-exps.png) @@ -255,13 +255,13 @@ $ dvc push ``` If you don't want to create a new Git branch and instead want to commit the -experiment directly on top of your current Git branch, you can [bring experiment -results to your workspace]. +experiment directly on top of your current Git branch, you can [bring +experiment results to your workspace]. ## Remove pushed experiments -As you share more experiments, [DVC Studio] and Git remotes may be become -cluttered with experiment references. +As you share more experiments, [DVC Studio] and Git remotes may be become cluttered +with experiment references. You can remove experiments in DVC Studio: diff --git a/content/docs/user-guide/experiment-management/visualizing-plots.md b/content/docs/user-guide/experiment-management/visualizing-plots.md index 8b861308c6..595d79b65b 100644 --- a/content/docs/user-guide/experiment-management/visualizing-plots.md +++ b/content/docs/user-guide/experiment-management/visualizing-plots.md @@ -239,7 +239,8 @@ When you run [experiments] or otherwise update the data in the plots files, those updates will be automatically reflected in your visualizations. To [compare between experiments] or Git [revisions], you can use `dvc plots diff`, the [plots dashboard] from the [VS Code Extension][dvc extension], or [DVC -Studio] to share with others. +Studio] +to share with others. ![](/img/plots_compare_vs_code.png) @@ -276,8 +277,7 @@ view. -Once you have [shared] the results to [DVC Studio], you can -[compare experiments](/doc/studio/user-guide/experiments/visualize-and-compare) +Once you have [shared] the results to [DVC Studio], you can [compare experiments](/doc/studio/user-guide/experiments/visualize-and-compare) against the entire repo history: ![DVC Studio view](/img/dvclive-studio.png) diff --git a/content/docs/user-guide/index.md b/content/docs/user-guide/index.md index 1eb07dec4a..879c80360a 100644 --- a/content/docs/user-guide/index.md +++ b/content/docs/user-guide/index.md @@ -4,9 +4,9 @@ ## What is DVC? --> -**Data Version Control** is a [free], open-source tool for [data management], -[ML pipeline][ml pipelines] automation, and [experiment management]. This helps -data science and machine learning teams manage **large datasets**, make projects +**Data Version Control** is a [free], open-source tool for [data management], [ML +pipeline][ml pipelines] automation, and [experiment management]. This helps data +science and machine learning teams manage **large datasets**, make projects **reproducible**, and **collaborate** better. DVC takes advantage of the existing software engineering toolset your team @@ -32,9 +32,10 @@ this set of principles: ## Characteristics -- DVC comes as a [VS Code Extension], as a [command line] interface, and as a - [Python API]. These options provide a familiar an intuitive **user - experience** to a broad range of users. +- DVC comes as a [VS Code Extension], as a [command line] interface, and as a [Python + API]. + These options provide a familiar an intuitive **user experience** to a broad + range of users. - **Easy to use**: DVC is quick to [install](/doc/install) and works out of the box. It doesn't require special infrastructure, nor does it depend on APIs or external services. @@ -100,9 +101,8 @@ DVC is not fundamentally bound to Git, and can work without it (except **DVC does not replace Git!** DVC [metafiles] such as `dvc.yaml` and `.dvc` files serve as placeholders to version data and ML pipelines. These files change -along with your data, and you can use Git to place them under [version control] -as a proxy to the actual data, which is stored in a cache (outside -of Git). +along with your data, and you can use Git to place them under [version control] as +a proxy to the actual data, which is stored in a cache (outside of Git). DVC does, however, provide several commands similar to Git such as `dvc init`, `dvc add`, `dvc checkout`, or `dvc push`, which interact with the underlying Git @@ -125,8 +125,7 @@ repo (if one is being used, which is not required). [available]). - Git-LFS was not made with data science in mind, so it doesn't provide related - features (e.g. [ML pipelines], [metrics](/doc/command-reference/metrics), - etc.). + features (e.g. [ML pipelines], [metrics](/doc/command-reference/metrics), etc.). - GitHub (common Git hosting service) has a limit of 2 GB per repository. @@ -183,8 +182,8 @@ hard links or symlinks, editing reflinks is always safe, as the original ### Workflow management systems -Systems to manage data pipelines and [dependency graphs] such as _Airflow_, -_Luigi_, etc. +Systems to manage data pipelines and [dependency graphs] such as _Airflow_, _Luigi_, +etc. - DVC is focused on data science and modeling. As a result, DVC pipelines are lightweight and easy to create and modify. However, DVC lacks advanced diff --git a/content/docs/user-guide/integrations/databricks.md b/content/docs/user-guide/integrations/databricks.md index 44166ede0e..dfbe79206b 100644 --- a/content/docs/user-guide/integrations/databricks.md +++ b/content/docs/user-guide/integrations/databricks.md @@ -12,8 +12,7 @@ directly. %pip install dvc ``` -In order to be able to work in [Databricks Repos], you'll need to use this -workaround: +In order to be able to work in [Databricks Repos], you'll need to use this workaround: ```bash !dvc config core.no_scm true --local @@ -21,8 +20,7 @@ workaround: ## DVC API -You can use your existing DVC projects through the [Python API] as normal, for -example: +You can use your existing DVC projects through the [Python API] as normal, for example: ```python import dvc.api @@ -58,9 +56,8 @@ with dvc.api.open( ## Running DVC commands Databricks doesn't provide a classic terminal by default, so you'll need to use -[magic commands] to run DVC commands in your notebook. If your workspace does -have [web terminal] enabled, you can also run DVC commands in the terminal as -normal. +[magic commands] to run DVC commands in your notebook. If your workspace does have +[web terminal] enabled, you can also run DVC commands in the terminal as normal. ### Example: set up shared DVC cache on dbfs @@ -74,9 +71,9 @@ normal. !dvc add data ``` -If working with [Databricks Repos], due to the limitations described in the -beginning and `noscm` workaround, DVC won't be able to automatically add new -entries to corresponding `.gitignore`s, so you'll need to do that manually. +If working with [Databricks Repos], due to the limitations described in the beginning +and `noscm` workaround, DVC won't be able to automatically add new entries to corresponding +`.gitignore`s, so you'll need to do that manually. ### Example: import data @@ -86,9 +83,8 @@ entries to corresponding `.gitignore`s, so you'll need to do that manually. ## Live experiment updates -If working with [Databricks Repos], you will need to set both the -`DVC_STUDIO_TOKEN` and `DVC_EXP_GIT_REMOTE` to see [live experiment updates] in -[DVC Studio]. +If working with [Databricks Repos], you will need to set both the `DVC_STUDIO_TOKEN` +and `DVC_EXP_GIT_REMOTE` to see [live experiment updates] in [DVC Studio]. ```python import getpass diff --git a/content/docs/user-guide/integrations/sagemaker.md b/content/docs/user-guide/integrations/sagemaker.md index 501375799c..673419e5e2 100644 --- a/content/docs/user-guide/integrations/sagemaker.md +++ b/content/docs/user-guide/integrations/sagemaker.md @@ -31,8 +31,7 @@ you would in any other environment. Take a look at DVC [experiments] for how to get started with DVC in notebooks (if you have setup [code-server] on SageMaker, you can also install the [DVC extension for VS Code]). -If you would like to see live experiment updates in [DVC Studio], set your -token: +If you would like to see live experiment updates in [DVC Studio], set your token: ```cli $ dvc studio login @@ -77,11 +76,10 @@ prepare: cache: false ``` -The [preprocessing script] takes `bucket` and `prefix` as arguments and -otherwise is copied directly from the original notebook code, which uses a -SageMaker Processing job. The DVC pipeline stage tracks the command, scripts, -input paths, and outputs paths, so that this stage will only be run again if any -of those change: +The [preprocessing script] takes `bucket` and `prefix` as arguments and otherwise +is copied directly from the original notebook code, which uses a SageMaker Processing +job. The DVC pipeline stage tracks the command, scripts, input paths, and outputs +paths, so that this stage will only be run again if any of those change: ```yaml preprocessing: @@ -100,13 +98,11 @@ preprocessing: ``` Finally, the [training script] uses the SageMaker Estimator for XGBoost to train -a model. We add all the model hyperparameters as arguments to make it easy to -tune hyperparameters and track what changed. Hyperparameters are added under the -`train` key in `params.yaml`. The DVC pipeline stage `cmd` includes `${train}` -to -[unpack and pass](https://dvc.org/doc/user-guide/project-structure/dvcyaml-files#dictionary-unpacking) -all those arguments and track them as parameters, in addition to tracking the -other inputs and outputs: +a model. We add all the model hyperparameters as arguments to make it easy to tune +hyperparameters and track what changed. Hyperparameters are added under the `train` +key in `params.yaml`. The DVC pipeline stage `cmd` includes `${train}` to [unpack and pass](https://dvc.org/doc/user-guide/project-structure/dvcyaml-files#dictionary-unpacking) +all those arguments and track them as parameters, in addition to tracking the other +inputs and outputs: ```yaml training: diff --git a/content/docs/user-guide/pipelines/defining-pipelines.md b/content/docs/user-guide/pipelines/defining-pipelines.md index f8f47313fb..e0ffc890f3 100644 --- a/content/docs/user-guide/pipelines/defining-pipelines.md +++ b/content/docs/user-guide/pipelines/defining-pipelines.md @@ -4,8 +4,8 @@ Pipelines represent data workflows that you want to **reproduce** reliably -- so the results are consistent. The typical pipelining process involves: - Obtain and `dvc add` or `dvc import` the project's initial data requirements - (see [Data Versioning]). This caches the data and generates - `.dvc` files. + (see [Data Versioning]). This caches the data and generates `.dvc` + files. - Define the pipeline [stages](#stages) in `dvc.yaml` files (more on this later). Example structure: @@ -169,14 +169,13 @@ the previous section's example). A more granular type of dependency is the parameter (`params` field of `dvc.yaml`), or _hyperparameters_ in machine learning. These are any values used inside your code to tune data processing, or that affect stage execution in any -other way. For example, training a [Neural Network] usually requires _batch -size_ and _epoch_ values. +other way. For example, training a [Neural Network] usually requires _batch size_ +and _epoch_ values. Instead of hard-coding param values, your code can read them from a structured file (e.g. YAML format). DVC can track any key/value pair in a supported -[parameters file] (`params.yaml` by default). Params are granular dependencies -because DVC only invalidates stages when the corresponding part of the params -file has changed. +[parameters file] (`params.yaml` by default). Params are granular dependencies because +DVC only invalidates stages when the corresponding part of the params file has changed. ```yaml stages: diff --git a/content/docs/user-guide/pipelines/running-pipelines.md b/content/docs/user-guide/pipelines/running-pipelines.md index 5e14ce005d..6351e7fa1c 100644 --- a/content/docs/user-guide/pipelines/running-pipelines.md +++ b/content/docs/user-guide/pipelines/running-pipelines.md @@ -346,9 +346,8 @@ true ## Debugging Stages If you are using advanced features to interpolate values for your pipeline, like -[templating] or [Hydra composition], you can get the interpolated values by -running `dvc repro -vv` or `dvc exp run -vv`, which will include information -like: +[templating] or [Hydra composition], you can get the interpolated values by running +`dvc repro -vv` or `dvc exp run -vv`, which will include information like: ```cli 2023-05-18 07:38:43,955 TRACE: Hydra composition enabled. diff --git a/content/docs/user-guide/project-structure/configuration.md b/content/docs/user-guide/project-structure/configuration.md index 80f6504a83..1d5f615824 100644 --- a/content/docs/user-guide/project-structure/configuration.md +++ b/content/docs/user-guide/project-structure/configuration.md @@ -1,8 +1,8 @@ # DVC Configuration Once initialized in a project, DVC populates its installation -directory with [internal files], which include `.dvc/config`, the default -configuration file. +directory with [internal files], which include `.dvc/config`, the default configuration +file. Config files can be composed manually (or programmatically), or managed with the helper command `dvc config`. @@ -56,13 +56,11 @@ config file (`.dvc/config` by default), supporting different config options within: - [`core`](#core) - main section with the general config options -- [`remote`](#remote) - sections in the config file that describe [remote - storage] +- [`remote`](#remote) - sections in the config file that describe [remote storage] - [`cache`](#cache) - options that affect the project's cache - [`db`](#db) - sections in the config file that describe [database connections] - [`exp`](#exp) - options around [experiments] configuration. -- [`hydra`](#hydra) - options around [Hydra Composition] for experiment - configuration. +- [`hydra`](#hydra) - options around [Hydra Composition] for experiment configuration. - [`parsing`](#parsing) - options around the parsing of [dictionary unpacking]. - [`plots`](#plots) - options for configuring `dvc plots`. - [`state`](#state) - see [Internal directories and files][internals] to learn @@ -254,10 +252,11 @@ to, in commands like `import-db`. Sets the defaults for experiment configuration. -- `exp.auto_push` - [push experiment] automatically after `dvc exp run` and - `dvc exp save`. Accepts values `true` and `false` (default). +- `exp.auto_push` - [push experiment] automatically after `dvc exp run` and `dvc exp save`. + Accepts values `true` and `false` (default). - `exp.git_remote` - Git remote name or URL used to [push experiment] and [send - live metrics and plots] to [DVC Studio]. Defaults to `origin`. + live + metrics and plots] to [DVC Studio]. Defaults to `origin`. [push experiment]: /doc/user-guide/experiment-management/sharing-experiments [send live metrics and plots]: @@ -270,8 +269,7 @@ Sets the defaults for experiment configuration. ## hydra -Sets the defaults for experiment configuration via [Hydra -Composition]. +Sets the defaults for experiment configuration via [Hydra Composition]. - `hydra.enabled` - enables Hydra [config composition]. - `hydra.config_dir` - location of the directory containing Hydra [config @@ -425,11 +423,10 @@ have no effect. or check [this guide on how to create an access token](/doc/studio/user-guide/experiments/live-metrics-and-plots#set-up-an-access-token). -- `studio.offline` - Disables sharing [live experiments] even if `studio.token` - is set or the token has been specified in `DVC_STUDIO_TOKEN`. Offline mode can - also be specified through `DVC_STUDIO_OFFLINE` environment variable, which - will override any value in `studio.offline`. Accepts values `true` and - `false`. +- `studio.offline` - Disables sharing [live experiments] even if `studio.token` is + set or the token has been specified in `DVC_STUDIO_TOKEN`. Offline mode can also + be specified through `DVC_STUDIO_OFFLINE` environment variable, which will override + any value in `studio.offline`. Accepts values `true` and `false`. - `studio.url` - URL of Studio to use (in case of self-hosted DVC Studio instance). This can also be specified through `DVC_STUDIO_URL` environment diff --git a/content/docs/user-guide/project-structure/dvcyaml-files.md b/content/docs/user-guide/project-structure/dvcyaml-files.md index 84ae31e3c7..3a998d35da 100644 --- a/content/docs/user-guide/project-structure/dvcyaml-files.md +++ b/content/docs/user-guide/project-structure/dvcyaml-files.md @@ -94,8 +94,8 @@ If the ID is an arbitrary string, a file path must be provided in the `y` field -Refer to [Visualizing Plots] and `dvc plots show` for more examples, and refer -to [DVCLive] for a helper to log plots. +Refer to [Visualizing Plots] and `dvc plots show` for more examples, and refer to +[DVCLive] for a helper to log plots. [visualizing plots]: /doc/user-guide/experiment-management/visualizing-plots @@ -330,10 +330,9 @@ the same params file, but only certain values will affect their state (see #### Parameters files -The supported params file formats are YAML 1.2, JSON, TOML 1.0, [and Python]. -[Parameter](#parameters) key/value pairs should be organized in tree-like -hierarchies inside. Supported value types are: string, integer, float, boolean, -and arrays (groups of params). +The supported params file formats are YAML 1.2, JSON, TOML 1.0, [and Python]. [Parameter](#parameters) +key/value pairs should be organized in tree-like hierarchies inside. Supported value +types are: string, integer, float, boolean, and arrays (groups of params). These files are typically written manually (or generated) and they can be versioned directly with Git along with other workspace files. @@ -711,8 +710,9 @@ $ R train.r --foo 'foo' --bar 1 --bool \ -You can combine this with argument parsing libraries such as [R argparse] or -[Julia ArgParse] to do all the work for you. +You can combine this with argument parsing libraries such as [R argparse] or [Julia +ArgParse] +to do all the work for you. [r argparse]: https://cran.r-project.org/web/packages/argparse/vignettes/argparse.html diff --git a/content/docs/user-guide/project-structure/internal-files.md b/content/docs/user-guide/project-structure/internal-files.md index df5a98967f..f0eb3e26f8 100644 --- a/content/docs/user-guide/project-structure/internal-files.md +++ b/content/docs/user-guide/project-structure/internal-files.md @@ -10,8 +10,8 @@ Not to be confused with `.dvc` files. -- `.dvc/config`: This is the default [DVC configuration] file. It can be edited - by hand or with the `dvc config` command. +- `.dvc/config`: This is the default [DVC configuration] file. It can be edited by + hand or with the `dvc config` command. - `.dvc/config.local`: This is an optional Git-ignored configuration file, that will overwrite options in `.dvc/config`. This is useful when you need to @@ -72,8 +72,8 @@ Not to be confused with `.dvc` files. ## Structure of the cache directory -The DVC cache is a [content-addressable storage] (by default in `.dvc/cache`), -which adds a layer of indirection between code and data. +The DVC cache is a [content-addressable storage] (by default in `.dvc/cache`), which +adds a layer of indirection between code and data. There are two ways in which the data is cached, depending on whether it's a single file, or a directory (which may contain multiple files). diff --git a/content/docs/user-guide/troubleshooting.md b/content/docs/user-guide/troubleshooting.md index 4555c2939c..2e45bb0b8a 100644 --- a/content/docs/user-guide/troubleshooting.md +++ b/content/docs/user-guide/troubleshooting.md @@ -12,8 +12,9 @@ custom anchor link is used. Just add {#custom-anchor} after each title: Users may encounter errors when running `dvc pull` and `dvc fetch`, like `WARNING: Cache 'xxxx' not found.` or `ERROR: failed to pull data from the cloud`. The most common cause is changes -pushed to Git without the corresponding data being uploaded to the [DVC remote]. -Make sure to `dvc push` from the original project, and try again. +pushed to Git without the corresponding data being uploaded to the [DVC +remote]. Make sure to `dvc push` from the original project, and try +again. [dvc remote]: /doc/user-guide/data-management/remote-storage @@ -131,8 +132,8 @@ ssh-add --apple-load-keychain ~/.ssh/ed255 You may encounter this error when using DVC on different Python versions with the same DVC project directory, for example having created the project on Python 3.8. in one environment and later attempting to update it from -a Python 3.7 env. This is due to temporary [internal directories] that can be -incompatible with older Python versions once created. +a Python 3.7 env. This is due to temporary [internal directories] that can be incompatible +with older Python versions once created. In these rare situations, it is safe to remove the corresponding tmp directory and retry the DVC command. Specifically, one of: diff --git a/gatsby-config.js b/gatsby-config.js index c4487caa27..990236c599 100644 --- a/gatsby-config.js +++ b/gatsby-config.js @@ -138,5 +138,6 @@ module.exports = { developMiddleware: app => { app.use(redirectsMiddleware) app.use('/api', apiMiddleware) - } + }, + jsxRuntime: 'automatic' } diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx index 072ad9ef24..5e178177b2 100644 --- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx +++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer' diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/LinkItems/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/LinkItems/index.tsx index f4744e9ae6..321b578a83 100644 --- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/LinkItems/index.tsx +++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/LinkItems/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link' diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/SocialIcons/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/SocialIcons/index.tsx index 6d75bf5d57..b52398968f 100644 --- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/SocialIcons/index.tsx +++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/SocialIcons/index.tsx @@ -1,5 +1,3 @@ -import React from 'react' - import SocialIcon, { ISocialIconProps } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon' diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/index.tsx index d087c6ae1a..34b7aa3b05 100644 --- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/index.tsx +++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/index.tsx @@ -1,5 +1,3 @@ -import React from 'react' - import SocialIcons from './SocialIcons' import LinkItems from './LinkItems' diff --git a/src/components/Community/Block/index.tsx b/src/components/Community/Block/index.tsx index 2e5d965ad9..c9a9deb175 100644 --- a/src/components/Community/Block/index.tsx +++ b/src/components/Community/Block/index.tsx @@ -1,5 +1,3 @@ -import React from 'react' - import * as styles from './styles.module.css' interface ICommunityBlockProps { diff --git a/src/components/Community/Contribute/index.tsx b/src/components/Community/Contribute/index.tsx index b84251ca8a..ab5853f33c 100644 --- a/src/components/Community/Contribute/index.tsx +++ b/src/components/Community/Contribute/index.tsx @@ -1,5 +1,3 @@ -import React from 'react' - import { ICommunitySectionTheme } from '../' import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer' import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link' diff --git a/src/components/Community/Hero/index.tsx b/src/components/Community/Hero/index.tsx index 9eed6724bf..f9ca2437f6 100644 --- a/src/components/Community/Hero/index.tsx +++ b/src/components/Community/Hero/index.tsx @@ -1,5 +1,3 @@ -import React from 'react' - import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer' import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly' import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link' diff --git a/src/components/Community/Testimonial/index.tsx b/src/components/Community/Testimonial/index.tsx index f5676f5944..860418ad45 100644 --- a/src/components/Community/Testimonial/index.tsx +++ b/src/components/Community/Testimonial/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer' import Section from '../Section' import IframeResizer from 'iframe-resizer-react' diff --git a/src/components/Community/index.tsx b/src/components/Community/index.tsx index f88c14d5b0..7570d6c2b8 100644 --- a/src/components/Community/index.tsx +++ b/src/components/Community/index.tsx @@ -1,5 +1,3 @@ -import React from 'react' - import PageContent from '../PageContent' import SubscribeSection from '../SubscribeSection' diff --git a/src/components/HeroContainer/index.tsx b/src/components/HeroContainer/index.tsx index 81f8acc052..64df4d8a53 100644 --- a/src/components/HeroContainer/index.tsx +++ b/src/components/HeroContainer/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer' diff --git a/src/components/Home/Alert/index.tsx b/src/components/Home/Alert/index.tsx index c9148e35a6..012e4996c9 100644 --- a/src/components/Home/Alert/index.tsx +++ b/src/components/Home/Alert/index.tsx @@ -1,5 +1,4 @@ import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link' -import React from 'react' import { cn } from '../../../utils' const banner = { diff --git a/src/components/Home/Hero/BetterTogether/index.tsx b/src/components/Home/Hero/BetterTogether/index.tsx index db4e121e39..8fa25d069c 100644 --- a/src/components/Home/Hero/BetterTogether/index.tsx +++ b/src/components/Home/Hero/BetterTogether/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import HeroContainer from '../../../HeroContainer' import { cn } from '../../../../utils' diff --git a/src/components/Home/Hero/GetStarted/GetStartedWithDatachain.tsx b/src/components/Home/Hero/GetStarted/GetStartedWithDatachain.tsx index 4c0f252c98..7c065c126f 100644 --- a/src/components/Home/Hero/GetStarted/GetStartedWithDatachain.tsx +++ b/src/components/Home/Hero/GetStarted/GetStartedWithDatachain.tsx @@ -1,4 +1,3 @@ -import React from 'react' import HeroContainer from '../../../HeroContainer' import { cn } from '../../../../utils' import DatachainSlides from '../../LandingHero/DatachainSlides' diff --git a/src/components/Home/Hero/GetStarted/GetStartedWithDvc.tsx b/src/components/Home/Hero/GetStarted/GetStartedWithDvc.tsx index 0b1685718a..a134fafd7a 100644 --- a/src/components/Home/Hero/GetStarted/GetStartedWithDvc.tsx +++ b/src/components/Home/Hero/GetStarted/GetStartedWithDvc.tsx @@ -1,4 +1,3 @@ -import React from 'react' import HeroContainer from '../../../HeroContainer' import { cn } from '../../../../utils' diff --git a/src/components/Home/Hero/HeroTitleSection.tsx b/src/components/Home/Hero/HeroTitleSection.tsx index b800961c72..5c6704dd0f 100644 --- a/src/components/Home/Hero/HeroTitleSection.tsx +++ b/src/components/Home/Hero/HeroTitleSection.tsx @@ -1,5 +1,4 @@ import cn from 'classnames' -import React from 'react' import * as styles from './styles.module.css' diff --git a/src/components/Home/Hero/index.tsx b/src/components/Home/Hero/index.tsx index 9ab36fa1fe..815ee57f15 100644 --- a/src/components/Home/Hero/index.tsx +++ b/src/components/Home/Hero/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import HeroTitleSection from './HeroTitleSection' import HeroSection from './HeroSection' import GetStartedWithDatachain from './GetStarted/GetStartedWithDatachain' diff --git a/src/components/Home/LandingHero/DatachainSlides.tsx b/src/components/Home/LandingHero/DatachainSlides.tsx index 36c7729a7d..a98fa13fdd 100644 --- a/src/components/Home/LandingHero/DatachainSlides.tsx +++ b/src/components/Home/LandingHero/DatachainSlides.tsx @@ -1,4 +1,3 @@ -import React from 'react' import Slides, { ISlide } from './Slides' import { graphql, useStaticQuery } from 'gatsby' diff --git a/src/components/Home/LandingHero/DvcSlides.tsx b/src/components/Home/LandingHero/DvcSlides.tsx index fcd35a822e..29426db580 100644 --- a/src/components/Home/LandingHero/DvcSlides.tsx +++ b/src/components/Home/LandingHero/DvcSlides.tsx @@ -1,4 +1,3 @@ -import React from 'react' import Slides, { ISlide } from './Slides' import { graphql, useStaticQuery } from 'gatsby' diff --git a/src/components/Home/LandingHero/GithubLine/index.tsx b/src/components/Home/LandingHero/GithubLine/index.tsx index 21b8145722..61428bdc85 100644 --- a/src/components/Home/LandingHero/GithubLine/index.tsx +++ b/src/components/Home/LandingHero/GithubLine/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link' import useStars from '../../../../gatsby/hooks/stars' diff --git a/src/components/Home/LandingHero/Slides/index.tsx b/src/components/Home/LandingHero/Slides/index.tsx index 449f433819..5355a2953d 100644 --- a/src/components/Home/LandingHero/Slides/index.tsx +++ b/src/components/Home/LandingHero/Slides/index.tsx @@ -1,4 +1,4 @@ -import React, { Reducer, useCallback, useMemo, useReducer } from 'react' +import { Reducer, useCallback, useMemo, useReducer } from 'react' import cn from 'classnames' import { MemoizedTypedTerminal } from '../Typed' diff --git a/src/components/Home/LandingHero/index.tsx b/src/components/Home/LandingHero/index.tsx index d68384a085..b8d7480cf3 100644 --- a/src/components/Home/LandingHero/index.tsx +++ b/src/components/Home/LandingHero/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' import TwoRowsButtonLink from '../../TwoRowsButton/link' diff --git a/src/components/Home/LearnMore/index.tsx b/src/components/Home/LearnMore/index.tsx index b6e64a4296..06cee5cabc 100644 --- a/src/components/Home/LearnMore/index.tsx +++ b/src/components/Home/LearnMore/index.tsx @@ -1,7 +1,6 @@ import * as styles from './styles.module.css' import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible' import cn from 'classnames' -import React from 'react' const logLearnMoreEvent = () => { logEvent('Hero', { Item: 'learn-more' }) diff --git a/src/components/Home/LogosSlider/CompanyLogos.tsx b/src/components/Home/LogosSlider/CompanyLogos.tsx index da215d734a..77dc384202 100644 --- a/src/components/Home/LogosSlider/CompanyLogos.tsx +++ b/src/components/Home/LogosSlider/CompanyLogos.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' const logoClass = diff --git a/src/components/Home/LogosSlider/index.tsx b/src/components/Home/LogosSlider/index.tsx index 219f993fa8..12b0eb7f27 100644 --- a/src/components/Home/LogosSlider/index.tsx +++ b/src/components/Home/LogosSlider/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' import * as styles from './styles.module.css' diff --git a/src/components/Home/index.tsx b/src/components/Home/index.tsx index c1f4d2afe5..b91181431d 100644 --- a/src/components/Home/index.tsx +++ b/src/components/Home/index.tsx @@ -1,5 +1,3 @@ -import React from 'react' - import WhatsNewModal from './WhatsNewModal' import SubscribeSection from '../SubscribeSection' diff --git a/src/components/MainLayout/index.tsx b/src/components/MainLayout/index.tsx index ba32acf172..ddd2c153d3 100644 --- a/src/components/MainLayout/index.tsx +++ b/src/components/MainLayout/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import { PageProps } from 'gatsby' import ThemeMainLayout, { LayoutModifiers, diff --git a/src/components/NotFound/index.tsx b/src/components/NotFound/index.tsx index 2c8c06c0c6..df0fbc28e6 100644 --- a/src/components/NotFound/index.tsx +++ b/src/components/NotFound/index.tsx @@ -1,5 +1,3 @@ -import React from 'react' - import * as styles from './styles.module.css' const NotFound: React.FC = () => ( diff --git a/src/components/PageContent/index.tsx b/src/components/PageContent/index.tsx index 78d66fc48f..45e56cf25f 100644 --- a/src/components/PageContent/index.tsx +++ b/src/components/PageContent/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' import * as styles from './styles.module.css' diff --git a/src/components/Spinner/index.tsx b/src/components/Spinner/index.tsx index 3db28690ff..7aec88908e 100644 --- a/src/components/Spinner/index.tsx +++ b/src/components/Spinner/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import { cn } from '../../utils' const Spinner = ({ className }: { className?: string }) => { diff --git a/src/components/SubscribeSection/index.tsx b/src/components/SubscribeSection/index.tsx index 90c06600d0..ca368191f7 100644 --- a/src/components/SubscribeSection/index.tsx +++ b/src/components/SubscribeSection/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' import SubscribeForm from './Form' diff --git a/src/components/Support/RequestAQuoteDialog.tsx b/src/components/Support/RequestAQuoteDialog.tsx index 55acc3cb6d..0f82ebbcd0 100644 --- a/src/components/Support/RequestAQuoteDialog.tsx +++ b/src/components/Support/RequestAQuoteDialog.tsx @@ -1,4 +1,4 @@ -import React from 'react' +import { Dispatch, useState } from 'react' import { Dialog, DialogContent, @@ -15,9 +15,9 @@ const RequestAQuoteDialog = ({ setOpenDialog }: { openDialog: boolean - setOpenDialog: React.Dispatch> + setOpenDialog: Dispatch> }) => { - const [pauseDialog, setPauseDialog] = React.useState(false) + const [pauseDialog, setPauseDialog] = useState(false) const title = 'Request a Quote' const description = "Fill out the form below and we'll reach out to find a time that works for you!" diff --git a/src/components/ThankYou/index.tsx b/src/components/ThankYou/index.tsx index 60edfc6165..2541246f8a 100644 --- a/src/components/ThankYou/index.tsx +++ b/src/components/ThankYou/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import * as styles from './styles.module.css' const ThankYouPage: React.FC = () => ( diff --git a/src/components/TwoRowsButton/index.tsx b/src/components/TwoRowsButton/index.tsx index 82a15603c0..0c5bbbbeb3 100644 --- a/src/components/TwoRowsButton/index.tsx +++ b/src/components/TwoRowsButton/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' import * as styles from './styles.module.css' diff --git a/src/components/TwoRowsButton/link.tsx b/src/components/TwoRowsButton/link.tsx index c0e74d59d1..0a511ee7f9 100644 --- a/src/components/TwoRowsButton/link.tsx +++ b/src/components/TwoRowsButton/link.tsx @@ -1,4 +1,3 @@ -import React from 'react' import cn from 'classnames' import * as styles from './styles.module.css' diff --git a/src/components/Typeform/index.tsx b/src/components/Typeform/index.tsx index 74ee51f300..a8444e9563 100644 --- a/src/components/Typeform/index.tsx +++ b/src/components/Typeform/index.tsx @@ -1,4 +1,3 @@ -import React from 'react' import * as styles from './styles.module.css' const Typeform: React.FC<{ diff --git a/src/components/base/button.tsx b/src/components/base/button.tsx index 9edd664f35..dcc44f9495 100644 --- a/src/components/base/button.tsx +++ b/src/components/base/button.tsx @@ -1,10 +1,9 @@ -import React from 'react' +import { ButtonHTMLAttributes, forwardRef } from 'react' import { cn } from '../../utils' -export interface IButtonProps - extends React.ButtonHTMLAttributes {} +export interface IButtonProps extends ButtonHTMLAttributes {} -const Button = React.forwardRef( +const Button = forwardRef( ({ className, ...props }, ref) => { return (