mirror of
https://github.com/facebookresearch/ReAgent.git
synced 2026-05-17 12:40:39 +00:00
573 lines
109 KiB
HTML
573 lines
109 KiB
HTML
<!DOCTYPE html>
|
|
<html class="writer-html5" lang="en" >
|
|
<head>
|
|
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>reagent.model_managers.model_based package — ReAgent 1.0 documentation</title>
|
|
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
|
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
|
<!--[if lt IE 9]>
|
|
<script src="../_static/js/html5shiv.min.js"></script>
|
|
<![endif]-->
|
|
|
|
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
|
<script src="../_static/jquery.js"></script>
|
|
<script src="../_static/underscore.js"></script>
|
|
<script src="../_static/doctools.js"></script>
|
|
<script src="../_static/js/theme.js"></script>
|
|
<link rel="index" title="Index" href="../genindex.html" />
|
|
<link rel="search" title="Search" href="../search.html" />
|
|
<link rel="next" title="reagent.model_managers.parametric package" href="reagent.model_managers.parametric.html" />
|
|
<link rel="prev" title="reagent.model_managers.discrete package" href="reagent.model_managers.discrete.html" />
|
|
</head>
|
|
|
|
<body class="wy-body-for-nav">
|
|
<div class="wy-grid-for-nav">
|
|
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
<div class="wy-side-scroll">
|
|
<div class="wy-side-nav-search" >
|
|
<a href="../index.html" class="icon icon-home"> ReAgent
|
|
</a>
|
|
<div role="search">
|
|
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
|
<input type="text" name="q" placeholder="Search docs" />
|
|
<input type="hidden" name="check_keywords" value="yes" />
|
|
<input type="hidden" name="area" value="default" />
|
|
</form>
|
|
</div>
|
|
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
<p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../usage.html">Usage</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../rasp_tutorial.html">RASP (Not Actively Maintained)</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Advanced Topics</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../continuous_integration.html">Continuous Integration</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Package Reference</span></p>
|
|
<ul class="current">
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.core.html">Core</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.data.html">Data</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.gym.html">Gym</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.evaluation.html">Evaluation</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.lite.html">Lite</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.mab.html">MAB</a></li>
|
|
<li class="toctree-l1 current"><a class="reference internal" href="reagent.model_managers.html">Model Managers</a><ul class="current">
|
|
<li class="toctree-l2 current"><a class="reference internal" href="reagent.model_managers.html#subpackages">Subpackages</a><ul class="current">
|
|
<li class="toctree-l3"><a class="reference internal" href="reagent.model_managers.actor_critic.html">reagent.model_managers.actor_critic package</a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="reagent.model_managers.discrete.html">reagent.model_managers.discrete package</a></li>
|
|
<li class="toctree-l3 current"><a class="current reference internal" href="#">reagent.model_managers.model_based package</a><ul>
|
|
<li class="toctree-l4"><a class="reference internal" href="#submodules">Submodules</a></li>
|
|
<li class="toctree-l4"><a class="reference internal" href="#module-reagent.model_managers.model_based.cross_entropy_method">reagent.model_managers.model_based.cross_entropy_method module</a></li>
|
|
<li class="toctree-l4"><a class="reference internal" href="#module-reagent.model_managers.model_based.seq2reward_model">reagent.model_managers.model_based.seq2reward_model module</a></li>
|
|
<li class="toctree-l4"><a class="reference internal" href="#module-reagent.model_managers.model_based.synthetic_reward">reagent.model_managers.model_based.synthetic_reward module</a></li>
|
|
<li class="toctree-l4"><a class="reference internal" href="#module-reagent.model_managers.model_based.world_model">reagent.model_managers.model_based.world_model module</a></li>
|
|
<li class="toctree-l4"><a class="reference internal" href="#module-reagent.model_managers.model_based">Module contents</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l3"><a class="reference internal" href="reagent.model_managers.parametric.html">reagent.model_managers.parametric package</a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="reagent.model_managers.policy_gradient.html">reagent.model_managers.policy_gradient package</a></li>
|
|
<li class="toctree-l3"><a class="reference internal" href="reagent.model_managers.ranking.html">reagent.model_managers.ranking package</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l2"><a class="reference internal" href="reagent.model_managers.html#submodules">Submodules</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="reagent.model_managers.html#module-reagent.model_managers.actor_critic_base">reagent.model_managers.actor_critic_base module</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="reagent.model_managers.html#module-reagent.model_managers.discrete_dqn_base">reagent.model_managers.discrete_dqn_base module</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="reagent.model_managers.html#module-reagent.model_managers.model_manager">reagent.model_managers.model_manager module</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="reagent.model_managers.html#module-reagent.model_managers.parametric_dqn_base">reagent.model_managers.parametric_dqn_base module</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="reagent.model_managers.html#module-reagent.model_managers.slate_q_base">reagent.model_managers.slate_q_base module</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="reagent.model_managers.html#module-reagent.model_managers.union">reagent.model_managers.union module</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="reagent.model_managers.html#module-reagent.model_managers.world_model_base">reagent.model_managers.world_model_base module</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="reagent.model_managers.html#module-reagent.model_managers">Module contents</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.model_utils.html">Model Utils</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.net_builder.html">Net Builders</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.optimizer.html">Optimizers</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.models.html">Models</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.prediction.html">Prediction</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.preprocessing.html">Preprocessing</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.training.html">Training</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="reagent.workflow.html">Workflow</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="modules.html">All Modules</a></li>
|
|
</ul>
|
|
<p class="caption" role="heading"><span class="caption-text">Others</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference external" href="https://github.com/facebookresearch/ReAgent">Github</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../license.html">License</a></li>
|
|
</ul>
|
|
|
|
</div>
|
|
</div>
|
|
</nav>
|
|
|
|
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
<a href="../index.html">ReAgent</a>
|
|
</nav>
|
|
|
|
<div class="wy-nav-content">
|
|
<div class="rst-content">
|
|
<div role="navigation" aria-label="Page navigation">
|
|
<ul class="wy-breadcrumbs">
|
|
<li><a href="../index.html" class="icon icon-home"></a> »</li>
|
|
<li><a href="reagent.model_managers.html">reagent.model_managers package</a> »</li>
|
|
<li>reagent.model_managers.model_based package</li>
|
|
<li class="wy-breadcrumbs-aside">
|
|
<a href="../_sources/api/reagent.model_managers.model_based.rst.txt" rel="nofollow"> View page source</a>
|
|
</li>
|
|
</ul>
|
|
<hr/>
|
|
</div>
|
|
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
<div itemprop="articleBody">
|
|
|
|
<section id="reagent-model-managers-model-based-package">
|
|
<h1>reagent.model_managers.model_based package<a class="headerlink" href="#reagent-model-managers-model-based-package" title="Permalink to this headline"></a></h1>
|
|
<section id="submodules">
|
|
<h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this headline"></a></h2>
|
|
</section>
|
|
<section id="module-reagent.model_managers.model_based.cross_entropy_method">
|
|
<span id="reagent-model-managers-model-based-cross-entropy-method-module"></span><h2>reagent.model_managers.model_based.cross_entropy_method module<a class="headerlink" href="#module-reagent.model_managers.model_based.cross_entropy_method" title="Permalink to this headline"></a></h2>
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.cross_entropy_method.CEMPolicy">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.cross_entropy_method.</span></span><span class="sig-name descname"><span class="pre">CEMPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cem_planner_network</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.models.html#reagent.models.cem_planner.CEMPlannerNetwork" title="reagent.models.cem_planner.CEMPlannerNetwork"><span class="pre">reagent.models.cem_planner.CEMPlannerNetwork</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">discrete_action</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.cross_entropy_method.CEMPolicy" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.gym.policies.html#reagent.gym.policies.policy.Policy" title="reagent.gym.policies.policy.Policy"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.gym.policies.policy.Policy</span></code></a></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.cross_entropy_method.CEMPolicy.act">
|
|
<span class="sig-name descname"><span class="pre">act</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.core.html#reagent.core.types.FeatureData" title="reagent.core.types.FeatureData"><span class="pre">reagent.core.types.FeatureData</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">possible_actions_mask</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">torch.Tensor</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.core.html#reagent.core.types.ActorOutput" title="reagent.core.types.ActorOutput"><span class="pre">reagent.core.types.ActorOutput</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.cross_entropy_method.CEMPolicy.act" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Performs the composition described above.
|
|
These are the actions being put into the replay buffer, not necessary
|
|
the actions taken by the environment!</p>
|
|
</dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.cross_entropy_method.CrossEntropyMethod">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.cross_entropy_method.</span></span><span class="sig-name descname"><span class="pre">CrossEntropyMethod</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">reward_boost:</span> <span class="pre">Optional[Dict[str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trainer_param:</span> <span class="pre">reagent.core.parameters.CEMTrainerParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.cross_entropy_method.CrossEntropyMethod" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.model_managers.html#reagent.model_managers.world_model_base.WorldModelBase" title="reagent.model_managers.world_model_base.WorldModelBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.model_managers.world_model_base.WorldModelBase</span></code></a></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.cross_entropy_method.CrossEntropyMethod.build_trainer">
|
|
<span class="sig-name descname"><span class="pre">build_trainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.training.html#reagent.training.cem_trainer.CEMTrainer" title="reagent.training.cem_trainer.CEMTrainer"><span class="pre">reagent.training.cem_trainer.CEMTrainer</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.cross_entropy_method.CrossEntropyMethod.build_trainer" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Implement this to build the trainer, given the config</p>
|
|
<p>TODO: This function should return ReAgentLightningModule &
|
|
the dictionary of modules created</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.cross_entropy_method.CrossEntropyMethod.create_policy">
|
|
<span class="sig-name descname"><span class="pre">create_policy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">trainer_module</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.training.html#reagent.training.reagent_lightning_module.ReAgentLightningModule" title="reagent.training.reagent_lightning_module.ReAgentLightningModule"><span class="pre">reagent.training.reagent_lightning_module.ReAgentLightningModule</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">serving</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.gym.policies.html#reagent.gym.policies.policy.Policy" title="reagent.gym.policies.policy.Policy"><span class="pre">reagent.gym.policies.policy.Policy</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.cross_entropy_method.CrossEntropyMethod.create_policy" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.cross_entropy_method.CrossEntropyMethod.trainer_param">
|
|
<span class="sig-name descname"><span class="pre">trainer_param</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.CEMTrainerParameters" title="reagent.core.parameters.CEMTrainerParameters"><span class="pre">reagent.core.parameters.CEMTrainerParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.cross_entropy_method.CrossEntropyMethod.trainer_param" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
</section>
|
|
<section id="module-reagent.model_managers.model_based.seq2reward_model">
|
|
<span id="reagent-model-managers-model-based-seq2reward-model-module"></span><h2>reagent.model_managers.model_based.seq2reward_model module<a class="headerlink" href="#module-reagent.model_managers.model_based.seq2reward_model" title="Permalink to this headline"></a></h2>
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.seq2reward_model.</span></span><span class="sig-name descname"><span class="pre">Seq2RewardModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">reward_boost:</span> <span class="pre">Optional[Dict[str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">net_builder:</span> <span class="pre">reagent.net_builder.unions.ValueNetBuilder__Union</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compress_net_builder:</span> <span class="pre">reagent.net_builder.unions.ValueNetBuilder__Union</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trainer_param:</span> <span class="pre">reagent.core.parameters.Seq2RewardTrainerParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">preprocessing_options:</span> <span class="pre">Optional[reagent.workflow.types.PreprocessingOptions]</span> <span class="pre">=</span> <span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.model_managers.html#reagent.model_managers.world_model_base.WorldModelBase" title="reagent.model_managers.world_model_base.WorldModelBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.model_managers.world_model_base.WorldModelBase</span></code></a></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.build_trainer">
|
|
<span class="sig-name descname"><span class="pre">build_trainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.training.world_model.html#reagent.training.world_model.seq2reward_trainer.Seq2RewardTrainer" title="reagent.training.world_model.seq2reward_trainer.Seq2RewardTrainer"><span class="pre">reagent.training.world_model.seq2reward_trainer.Seq2RewardTrainer</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.build_trainer" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Implement this to build the trainer, given the config</p>
|
|
<p>TODO: This function should return ReAgentLightningModule &
|
|
the dictionary of modules created</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.compress_net_builder">
|
|
<span class="sig-name descname"><span class="pre">compress_net_builder</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.net_builder.html#reagent.net_builder.unions.ValueNetBuilder__Union" title="reagent.net_builder.unions.ValueNetBuilder__Union"><span class="pre">reagent.net_builder.unions.ValueNetBuilder__Union</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.compress_net_builder" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.get_reporter">
|
|
<span class="sig-name descname"><span class="pre">get_reporter</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.reporting.html#reagent.reporting.seq2reward_reporter.Seq2RewardReporter" title="reagent.reporting.seq2reward_reporter.Seq2RewardReporter"><span class="pre">reagent.reporting.seq2reward_reporter.Seq2RewardReporter</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.get_reporter" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.net_builder">
|
|
<span class="sig-name descname"><span class="pre">net_builder</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.net_builder.html#reagent.net_builder.unions.ValueNetBuilder__Union" title="reagent.net_builder.unions.ValueNetBuilder__Union"><span class="pre">reagent.net_builder.unions.ValueNetBuilder__Union</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.net_builder" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.preprocessing_options">
|
|
<span class="sig-name descname"><span class="pre">preprocessing_options</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.PreprocessingOptions" title="reagent.workflow.types.PreprocessingOptions"><span class="pre">reagent.workflow.types.PreprocessingOptions</span></a><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.preprocessing_options" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.trainer_param">
|
|
<span class="sig-name descname"><span class="pre">trainer_param</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.Seq2RewardTrainerParameters" title="reagent.core.parameters.Seq2RewardTrainerParameters"><span class="pre">reagent.core.parameters.Seq2RewardTrainerParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.seq2reward_model.Seq2RewardModel.trainer_param" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
</section>
|
|
<section id="module-reagent.model_managers.model_based.synthetic_reward">
|
|
<span id="reagent-model-managers-model-based-synthetic-reward-module"></span><h2>reagent.model_managers.model_based.synthetic_reward module<a class="headerlink" href="#module-reagent.model_managers.model_based.synthetic_reward" title="Permalink to this headline"></a></h2>
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.synthetic_reward.</span></span><span class="sig-name descname"><span class="pre">SyntheticReward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">trainer_param:</span> <span class="pre">reagent.training.parameters.RewardNetworkTrainerParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">net_builder:</span> <span class="pre">reagent.net_builder.unions.SyntheticRewardNetBuilder__Union</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_parameters:</span> <span class="pre">reagent.core.parameters.EvaluationParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">state_preprocessing_options:</span> <span class="pre">Optional[reagent.workflow.types.PreprocessingOptions]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">action_preprocessing_options:</span> <span class="pre">Optional[reagent.workflow.types.PreprocessingOptions]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">state_float_features:</span> <span class="pre">Optional[List[Tuple[int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">str]]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parametric_action_float_features:</span> <span class="pre">Optional[List[Tuple[int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">str]]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">discrete_action_names:</span> <span class="pre">Optional[List[str]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_seq_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">5</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.model_managers.html#reagent.model_managers.model_manager.ModelManager" title="reagent.model_managers.model_manager.ModelManager"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.model_managers.model_manager.ModelManager</span></code></a></p>
|
|
<p>Train models to attribute single step rewards from sparse/delayed/aggregated rewards.
|
|
Ideas from:
|
|
1. Synthetic Returns for Long-Term Credit Assignment: <a class="reference external" href="https://arxiv.org/pdf/2102.12425.pdf">https://arxiv.org/pdf/2102.12425.pdf</a>
|
|
2. RUDDER: Return Decomposition for Delayed Rewards: <a class="reference external" href="https://arxiv.org/pdf/1806.07857.pdf">https://arxiv.org/pdf/1806.07857.pdf</a>
|
|
3. Optimizing Agent Behavior over Long Time Scales by Transporting Value: <a class="reference external" href="https://arxiv.org/pdf/1810.06721.pdf">https://arxiv.org/pdf/1810.06721.pdf</a>
|
|
4. Sequence Modeling of Temporal Credit Assignment for Episodic Reinforcement Learning: <a class="reference external" href="https://arxiv.org/pdf/1905.13420.pdf">https://arxiv.org/pdf/1905.13420.pdf</a></p>
|
|
<dl class="py property">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.action_feature_config">
|
|
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">action_feature_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.types.ModelFeatureConfig" title="reagent.core.types.ModelFeatureConfig"><span class="pre">reagent.core.types.ModelFeatureConfig</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.action_feature_config" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.action_preprocessing_options">
|
|
<span class="sig-name descname"><span class="pre">action_preprocessing_options</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.PreprocessingOptions" title="reagent.workflow.types.PreprocessingOptions"><span class="pre">reagent.workflow.types.PreprocessingOptions</span></a><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.action_preprocessing_options" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.build_serving_module">
|
|
<span class="sig-name descname"><span class="pre">build_serving_module</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">trainer_module</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.training.html#reagent.training.reagent_lightning_module.ReAgentLightningModule" title="reagent.training.reagent_lightning_module.ReAgentLightningModule"><span class="pre">reagent.training.reagent_lightning_module.ReAgentLightningModule</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">torch.nn.modules.module.Module</span></span></span><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.build_serving_module" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Returns a TorchScript predictor module</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.build_trainer">
|
|
<span class="sig-name descname"><span class="pre">build_trainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.training.html#reagent.training.reward_network_trainer.RewardNetTrainer" title="reagent.training.reward_network_trainer.RewardNetTrainer"><span class="pre">reagent.training.reward_network_trainer.RewardNetTrainer</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.build_trainer" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Implement this to build the trainer, given the config</p>
|
|
<p>TODO: This function should return ReAgentLightningModule &
|
|
the dictionary of modules created</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.discrete_action_names">
|
|
<span class="sig-name descname"><span class="pre">discrete_action_names</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.discrete_action_names" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.eval_parameters">
|
|
<span class="sig-name descname"><span class="pre">eval_parameters</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.EvaluationParameters" title="reagent.core.parameters.EvaluationParameters"><span class="pre">reagent.core.parameters.EvaluationParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.eval_parameters" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.get_data_module">
|
|
<span class="sig-name descname"><span class="pre">get_data_module</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_table_spec</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.TableSpec" title="reagent.workflow.types.TableSpec"><span class="pre">reagent.workflow.types.TableSpec</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reader_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.ReaderOptions" title="reagent.workflow.types.ReaderOptions"><span class="pre">reagent.workflow.types.ReaderOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">setup_data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">bytes</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">saved_setup_data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">bytes</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">resource_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.ResourceOptions" title="reagent.workflow.types.ResourceOptions"><span class="pre">reagent.workflow.types.ResourceOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.data.html#reagent.data.reagent_data_module.ReAgentDataModule" title="reagent.data.reagent_data_module.ReAgentDataModule"><span class="pre">reagent.data.reagent_data_module.ReAgentDataModule</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.get_data_module" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Return the data module. If this is not None, then <cite>run_feature_identification</cite> &
|
|
<cite>query_data</cite> will not be run.</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.get_reporter">
|
|
<span class="sig-name descname"><span class="pre">get_reporter</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.get_reporter" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.max_seq_len">
|
|
<span class="sig-name descname"><span class="pre">max_seq_len</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">5</span></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.max_seq_len" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.net_builder">
|
|
<span class="sig-name descname"><span class="pre">net_builder</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.net_builder.html#reagent.net_builder.unions.SyntheticRewardNetBuilder__Union" title="reagent.net_builder.unions.SyntheticRewardNetBuilder__Union"><span class="pre">reagent.net_builder.unions.SyntheticRewardNetBuilder__Union</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.net_builder" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.parametric_action_float_features">
|
|
<span class="sig-name descname"><span class="pre">parametric_action_float_features</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.parametric_action_float_features" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py property">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.state_feature_config">
|
|
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">state_feature_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.types.ModelFeatureConfig" title="reagent.core.types.ModelFeatureConfig"><span class="pre">reagent.core.types.ModelFeatureConfig</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.state_feature_config" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.state_float_features">
|
|
<span class="sig-name descname"><span class="pre">state_float_features</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.state_float_features" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.state_preprocessing_options">
|
|
<span class="sig-name descname"><span class="pre">state_preprocessing_options</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.PreprocessingOptions" title="reagent.workflow.types.PreprocessingOptions"><span class="pre">reagent.workflow.types.PreprocessingOptions</span></a><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.state_preprocessing_options" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticReward.trainer_param">
|
|
<span class="sig-name descname"><span class="pre">trainer_param</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.training.html#reagent.training.parameters.RewardNetworkTrainerParameters" title="reagent.training.parameters.RewardNetworkTrainerParameters"><span class="pre">reagent.training.parameters.RewardNetworkTrainerParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticReward.trainer_param" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.synthetic_reward.</span></span><span class="sig-name descname"><span class="pre">SyntheticRewardDataModule</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Any</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.data.html#reagent.data.manual_data_module.ManualDataModule" title="reagent.data.manual_data_module.ManualDataModule"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.data.manual_data_module.ManualDataModule</span></code></a></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule.build_batch_preprocessor">
|
|
<span class="sig-name descname"><span class="pre">build_batch_preprocessor</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule.build_batch_preprocessor" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule.query_data">
|
|
<span class="sig-name descname"><span class="pre">query_data</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_table_spec</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.TableSpec" title="reagent.workflow.types.TableSpec"><span class="pre">reagent.workflow.types.TableSpec</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">sample_range</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">float</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_fetcher</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.data.html#reagent.data.data_fetcher.DataFetcher" title="reagent.data.data_fetcher.DataFetcher"><span class="pre">reagent.data.data_fetcher.DataFetcher</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.Dataset" title="reagent.workflow.types.Dataset"><span class="pre">reagent.workflow.types.Dataset</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule.query_data" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Massage input table into the format expected by the trainer</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule.run_feature_identification">
|
|
<span class="sig-name descname"><span class="pre">run_feature_identification</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_table_spec</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.TableSpec" title="reagent.workflow.types.TableSpec"><span class="pre">reagent.workflow.types.TableSpec</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule.run_feature_identification" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Derive preprocessing parameters from data.</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py property">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule.should_generate_eval_dataset">
|
|
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">should_generate_eval_dataset</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#reagent.model_managers.model_based.synthetic_reward.SyntheticRewardDataModule.should_generate_eval_dataset" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
</section>
|
|
<section id="module-reagent.model_managers.model_based.world_model">
|
|
<span id="reagent-model-managers-model-based-world-model-module"></span><h2>reagent.model_managers.model_based.world_model module<a class="headerlink" href="#module-reagent.model_managers.model_based.world_model" title="Permalink to this headline"></a></h2>
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.world_model.WorldModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.world_model.</span></span><span class="sig-name descname"><span class="pre">WorldModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">reward_boost:</span> <span class="pre">Optional[Dict[str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trainer_param:</span> <span class="pre">reagent.core.parameters.MDNRNNTrainerParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.world_model.WorldModel" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.model_managers.html#reagent.model_managers.world_model_base.WorldModelBase" title="reagent.model_managers.world_model_base.WorldModelBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.model_managers.world_model_base.WorldModelBase</span></code></a></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.world_model.WorldModel.build_trainer">
|
|
<span class="sig-name descname"><span class="pre">build_trainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.training.world_model.html#reagent.training.world_model.mdnrnn_trainer.MDNRNNTrainer" title="reagent.training.world_model.mdnrnn_trainer.MDNRNNTrainer"><span class="pre">reagent.training.world_model.mdnrnn_trainer.MDNRNNTrainer</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.world_model.WorldModel.build_trainer" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Implement this to build the trainer, given the config</p>
|
|
<p>TODO: This function should return ReAgentLightningModule &
|
|
the dictionary of modules created</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.world_model.WorldModel.trainer_param">
|
|
<span class="sig-name descname"><span class="pre">trainer_param</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.MDNRNNTrainerParameters" title="reagent.core.parameters.MDNRNNTrainerParameters"><span class="pre">reagent.core.parameters.MDNRNNTrainerParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.world_model.WorldModel.trainer_param" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
</section>
|
|
<section id="module-reagent.model_managers.model_based">
|
|
<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-reagent.model_managers.model_based" title="Permalink to this headline"></a></h2>
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.CrossEntropyMethod">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.</span></span><span class="sig-name descname"><span class="pre">CrossEntropyMethod</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">reward_boost:</span> <span class="pre">Optional[Dict[str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trainer_param:</span> <span class="pre">reagent.core.parameters.CEMTrainerParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.CrossEntropyMethod" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.model_managers.html#reagent.model_managers.world_model_base.WorldModelBase" title="reagent.model_managers.world_model_base.WorldModelBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.model_managers.world_model_base.WorldModelBase</span></code></a></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.CrossEntropyMethod.build_trainer">
|
|
<span class="sig-name descname"><span class="pre">build_trainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.training.html#reagent.training.cem_trainer.CEMTrainer" title="reagent.training.cem_trainer.CEMTrainer"><span class="pre">reagent.training.cem_trainer.CEMTrainer</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.CrossEntropyMethod.build_trainer" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Implement this to build the trainer, given the config</p>
|
|
<p>TODO: This function should return ReAgentLightningModule &
|
|
the dictionary of modules created</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.CrossEntropyMethod.create_policy">
|
|
<span class="sig-name descname"><span class="pre">create_policy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">trainer_module</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.training.html#reagent.training.reagent_lightning_module.ReAgentLightningModule" title="reagent.training.reagent_lightning_module.ReAgentLightningModule"><span class="pre">reagent.training.reagent_lightning_module.ReAgentLightningModule</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">serving</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.gym.policies.html#reagent.gym.policies.policy.Policy" title="reagent.gym.policies.policy.Policy"><span class="pre">reagent.gym.policies.policy.Policy</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.CrossEntropyMethod.create_policy" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.CrossEntropyMethod.trainer_param">
|
|
<span class="sig-name descname"><span class="pre">trainer_param</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.CEMTrainerParameters" title="reagent.core.parameters.CEMTrainerParameters"><span class="pre">reagent.core.parameters.CEMTrainerParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.CrossEntropyMethod.trainer_param" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.Seq2RewardModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.</span></span><span class="sig-name descname"><span class="pre">Seq2RewardModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">reward_boost:</span> <span class="pre">Optional[Dict[str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">net_builder:</span> <span class="pre">reagent.net_builder.unions.ValueNetBuilder__Union</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">compress_net_builder:</span> <span class="pre">reagent.net_builder.unions.ValueNetBuilder__Union</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trainer_param:</span> <span class="pre">reagent.core.parameters.Seq2RewardTrainerParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">preprocessing_options:</span> <span class="pre">Optional[reagent.workflow.types.PreprocessingOptions]</span> <span class="pre">=</span> <span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.Seq2RewardModel" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.model_managers.html#reagent.model_managers.world_model_base.WorldModelBase" title="reagent.model_managers.world_model_base.WorldModelBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.model_managers.world_model_base.WorldModelBase</span></code></a></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.Seq2RewardModel.build_trainer">
|
|
<span class="sig-name descname"><span class="pre">build_trainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.training.world_model.html#reagent.training.world_model.seq2reward_trainer.Seq2RewardTrainer" title="reagent.training.world_model.seq2reward_trainer.Seq2RewardTrainer"><span class="pre">reagent.training.world_model.seq2reward_trainer.Seq2RewardTrainer</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.Seq2RewardModel.build_trainer" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Implement this to build the trainer, given the config</p>
|
|
<p>TODO: This function should return ReAgentLightningModule &
|
|
the dictionary of modules created</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.Seq2RewardModel.compress_net_builder">
|
|
<span class="sig-name descname"><span class="pre">compress_net_builder</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.net_builder.html#reagent.net_builder.unions.ValueNetBuilder__Union" title="reagent.net_builder.unions.ValueNetBuilder__Union"><span class="pre">reagent.net_builder.unions.ValueNetBuilder__Union</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.Seq2RewardModel.compress_net_builder" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.Seq2RewardModel.get_reporter">
|
|
<span class="sig-name descname"><span class="pre">get_reporter</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.reporting.html#reagent.reporting.seq2reward_reporter.Seq2RewardReporter" title="reagent.reporting.seq2reward_reporter.Seq2RewardReporter"><span class="pre">reagent.reporting.seq2reward_reporter.Seq2RewardReporter</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.Seq2RewardModel.get_reporter" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.Seq2RewardModel.net_builder">
|
|
<span class="sig-name descname"><span class="pre">net_builder</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.net_builder.html#reagent.net_builder.unions.ValueNetBuilder__Union" title="reagent.net_builder.unions.ValueNetBuilder__Union"><span class="pre">reagent.net_builder.unions.ValueNetBuilder__Union</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.Seq2RewardModel.net_builder" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.Seq2RewardModel.preprocessing_options">
|
|
<span class="sig-name descname"><span class="pre">preprocessing_options</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.PreprocessingOptions" title="reagent.workflow.types.PreprocessingOptions"><span class="pre">reagent.workflow.types.PreprocessingOptions</span></a><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.Seq2RewardModel.preprocessing_options" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.Seq2RewardModel.trainer_param">
|
|
<span class="sig-name descname"><span class="pre">trainer_param</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.Seq2RewardTrainerParameters" title="reagent.core.parameters.Seq2RewardTrainerParameters"><span class="pre">reagent.core.parameters.Seq2RewardTrainerParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.Seq2RewardModel.trainer_param" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.</span></span><span class="sig-name descname"><span class="pre">SyntheticReward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">trainer_param:</span> <span class="pre">reagent.training.parameters.RewardNetworkTrainerParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">net_builder:</span> <span class="pre">reagent.net_builder.unions.SyntheticRewardNetBuilder__Union</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">eval_parameters:</span> <span class="pre">reagent.core.parameters.EvaluationParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">state_preprocessing_options:</span> <span class="pre">Optional[reagent.workflow.types.PreprocessingOptions]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">action_preprocessing_options:</span> <span class="pre">Optional[reagent.workflow.types.PreprocessingOptions]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">state_float_features:</span> <span class="pre">Optional[List[Tuple[int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">str]]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parametric_action_float_features:</span> <span class="pre">Optional[List[Tuple[int</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">str]]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">discrete_action_names:</span> <span class="pre">Optional[List[str]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_seq_len:</span> <span class="pre">int</span> <span class="pre">=</span> <span class="pre">5</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.model_managers.html#reagent.model_managers.model_manager.ModelManager" title="reagent.model_managers.model_manager.ModelManager"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.model_managers.model_manager.ModelManager</span></code></a></p>
|
|
<p>Train models to attribute single step rewards from sparse/delayed/aggregated rewards.
|
|
Ideas from:
|
|
1. Synthetic Returns for Long-Term Credit Assignment: <a class="reference external" href="https://arxiv.org/pdf/2102.12425.pdf">https://arxiv.org/pdf/2102.12425.pdf</a>
|
|
2. RUDDER: Return Decomposition for Delayed Rewards: <a class="reference external" href="https://arxiv.org/pdf/1806.07857.pdf">https://arxiv.org/pdf/1806.07857.pdf</a>
|
|
3. Optimizing Agent Behavior over Long Time Scales by Transporting Value: <a class="reference external" href="https://arxiv.org/pdf/1810.06721.pdf">https://arxiv.org/pdf/1810.06721.pdf</a>
|
|
4. Sequence Modeling of Temporal Credit Assignment for Episodic Reinforcement Learning: <a class="reference external" href="https://arxiv.org/pdf/1905.13420.pdf">https://arxiv.org/pdf/1905.13420.pdf</a></p>
|
|
<dl class="py property">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.action_feature_config">
|
|
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">action_feature_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.types.ModelFeatureConfig" title="reagent.core.types.ModelFeatureConfig"><span class="pre">reagent.core.types.ModelFeatureConfig</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.action_feature_config" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.action_preprocessing_options">
|
|
<span class="sig-name descname"><span class="pre">action_preprocessing_options</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.PreprocessingOptions" title="reagent.workflow.types.PreprocessingOptions"><span class="pre">reagent.workflow.types.PreprocessingOptions</span></a><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.action_preprocessing_options" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.build_serving_module">
|
|
<span class="sig-name descname"><span class="pre">build_serving_module</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">trainer_module</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="reagent.training.html#reagent.training.reagent_lightning_module.ReAgentLightningModule" title="reagent.training.reagent_lightning_module.ReAgentLightningModule"><span class="pre">reagent.training.reagent_lightning_module.ReAgentLightningModule</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">torch.nn.modules.module.Module</span></span></span><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.build_serving_module" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Returns a TorchScript predictor module</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.build_trainer">
|
|
<span class="sig-name descname"><span class="pre">build_trainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.training.html#reagent.training.reward_network_trainer.RewardNetTrainer" title="reagent.training.reward_network_trainer.RewardNetTrainer"><span class="pre">reagent.training.reward_network_trainer.RewardNetTrainer</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.build_trainer" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Implement this to build the trainer, given the config</p>
|
|
<p>TODO: This function should return ReAgentLightningModule &
|
|
the dictionary of modules created</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.discrete_action_names">
|
|
<span class="sig-name descname"><span class="pre">discrete_action_names</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.discrete_action_names" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.eval_parameters">
|
|
<span class="sig-name descname"><span class="pre">eval_parameters</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.EvaluationParameters" title="reagent.core.parameters.EvaluationParameters"><span class="pre">reagent.core.parameters.EvaluationParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.eval_parameters" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.get_data_module">
|
|
<span class="sig-name descname"><span class="pre">get_data_module</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_table_spec</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.TableSpec" title="reagent.workflow.types.TableSpec"><span class="pre">reagent.workflow.types.TableSpec</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reader_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.ReaderOptions" title="reagent.workflow.types.ReaderOptions"><span class="pre">reagent.workflow.types.ReaderOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">setup_data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">bytes</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">saved_setup_data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">bytes</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">resource_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.ResourceOptions" title="reagent.workflow.types.ResourceOptions"><span class="pre">reagent.workflow.types.ResourceOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.data.html#reagent.data.reagent_data_module.ReAgentDataModule" title="reagent.data.reagent_data_module.ReAgentDataModule"><span class="pre">reagent.data.reagent_data_module.ReAgentDataModule</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.get_data_module" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Return the data module. If this is not None, then <cite>run_feature_identification</cite> &
|
|
<cite>query_data</cite> will not be run.</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.get_reporter">
|
|
<span class="sig-name descname"><span class="pre">get_reporter</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.get_reporter" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.max_seq_len">
|
|
<span class="sig-name descname"><span class="pre">max_seq_len</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">5</span></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.max_seq_len" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.net_builder">
|
|
<span class="sig-name descname"><span class="pre">net_builder</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.net_builder.html#reagent.net_builder.unions.SyntheticRewardNetBuilder__Union" title="reagent.net_builder.unions.SyntheticRewardNetBuilder__Union"><span class="pre">reagent.net_builder.unions.SyntheticRewardNetBuilder__Union</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.net_builder" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.parametric_action_float_features">
|
|
<span class="sig-name descname"><span class="pre">parametric_action_float_features</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.parametric_action_float_features" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py property">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.state_feature_config">
|
|
<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">state_feature_config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.types.ModelFeatureConfig" title="reagent.core.types.ModelFeatureConfig"><span class="pre">reagent.core.types.ModelFeatureConfig</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.state_feature_config" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.state_float_features">
|
|
<span class="sig-name descname"><span class="pre">state_float_features</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.state_float_features" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.state_preprocessing_options">
|
|
<span class="sig-name descname"><span class="pre">state_preprocessing_options</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.PreprocessingOptions" title="reagent.workflow.types.PreprocessingOptions"><span class="pre">reagent.workflow.types.PreprocessingOptions</span></a><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.state_preprocessing_options" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.SyntheticReward.trainer_param">
|
|
<span class="sig-name descname"><span class="pre">trainer_param</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.training.html#reagent.training.parameters.RewardNetworkTrainerParameters" title="reagent.training.parameters.RewardNetworkTrainerParameters"><span class="pre">reagent.training.parameters.RewardNetworkTrainerParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.SyntheticReward.trainer_param" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
<dl class="py class">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.WorldModel">
|
|
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">reagent.model_managers.model_based.</span></span><span class="sig-name descname"><span class="pre">WorldModel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">reward_boost:</span> <span class="pre">Optional[Dict[str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">float]]</span> <span class="pre">=</span> <span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trainer_param:</span> <span class="pre">reagent.core.parameters.MDNRNNTrainerParameters</span> <span class="pre">=</span> <span class="pre"><factory></span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#reagent.model_managers.model_based.WorldModel" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Bases: <a class="reference internal" href="reagent.model_managers.html#reagent.model_managers.world_model_base.WorldModelBase" title="reagent.model_managers.world_model_base.WorldModelBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">reagent.model_managers.world_model_base.WorldModelBase</span></code></a></p>
|
|
<dl class="py method">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.WorldModel.build_trainer">
|
|
<span class="sig-name descname"><span class="pre">build_trainer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">normalization_data_map</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.NormalizationData" title="reagent.core.parameters.NormalizationData"><span class="pre">reagent.core.parameters.NormalizationData</span></a><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_gpu</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reward_options</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="reagent.workflow.html#reagent.workflow.types.RewardOptions" title="reagent.workflow.types.RewardOptions"><span class="pre">reagent.workflow.types.RewardOptions</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">→</span> <span class="sig-return-typehint"><a class="reference internal" href="reagent.training.world_model.html#reagent.training.world_model.mdnrnn_trainer.MDNRNNTrainer" title="reagent.training.world_model.mdnrnn_trainer.MDNRNNTrainer"><span class="pre">reagent.training.world_model.mdnrnn_trainer.MDNRNNTrainer</span></a></span></span><a class="headerlink" href="#reagent.model_managers.model_based.WorldModel.build_trainer" title="Permalink to this definition"></a></dt>
|
|
<dd><p>Implement this to build the trainer, given the config</p>
|
|
<p>TODO: This function should return ReAgentLightningModule &
|
|
the dictionary of modules created</p>
|
|
</dd></dl>
|
|
|
|
<dl class="py attribute">
|
|
<dt class="sig sig-object py" id="reagent.model_managers.model_based.WorldModel.trainer_param">
|
|
<span class="sig-name descname"><span class="pre">trainer_param</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="reagent.core.html#reagent.core.parameters.MDNRNNTrainerParameters" title="reagent.core.parameters.MDNRNNTrainerParameters"><span class="pre">reagent.core.parameters.MDNRNNTrainerParameters</span></a></em><a class="headerlink" href="#reagent.model_managers.model_based.WorldModel.trainer_param" title="Permalink to this definition"></a></dt>
|
|
<dd></dd></dl>
|
|
|
|
</dd></dl>
|
|
|
|
</section>
|
|
</section>
|
|
|
|
|
|
</div>
|
|
</div>
|
|
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
|
<a href="reagent.model_managers.discrete.html" class="btn btn-neutral float-left" title="reagent.model_managers.discrete package" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
|
<a href="reagent.model_managers.parametric.html" class="btn btn-neutral float-right" title="reagent.model_managers.parametric package" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
|
</div>
|
|
|
|
<hr/>
|
|
|
|
<div role="contentinfo">
|
|
<p>© Copyright 2022, Meta Platforms, Inc.</p>
|
|
</div>
|
|
|
|
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
|
|
|
|
</footer>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
</div>
|
|
<script>
|
|
jQuery(function () {
|
|
SphinxRtdTheme.Navigation.enable(true);
|
|
});
|
|
</script>
|
|
|
|
</body>
|
|
</html> |