mirror of
https://github.com/facebookresearch/ReAgent.git
synced 2026-05-17 12:40:39 +00:00
937720fe97
Summary: Pull Request resolved: https://github.com/facebookresearch/ReAgent/pull/348 Test Plan: This code passes flow-cli canary rl.workflow.test.test_open_ai_gym_offline.test_gym_discrete_crr@reinforcement_learning --parameters-file=fblearner/flow/projects/rl/configs/gym_offline/test_gym_cartpole_crr.json --mode opt --entitlement=gpu_prod --run-as-secure-group reinforcement_learning --force-build Reviewed By: kittipatv Differential Revision: D25321153 Pulled By: DavidV17 fbshipit-source-id: f45c0763f4554dedee5a5f299141043adb679572
76 lines
2.7 KiB
Python
76 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
|
|
|
from typing import Dict, Tuple
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
from reagent.parameters import NormalizationParameters
|
|
from reagent.preprocessing.identify_types import (
|
|
CONTINUOUS_ACTION,
|
|
DISCRETE_ACTION,
|
|
DO_NOT_PREPROCESS,
|
|
)
|
|
from reagent.preprocessing.normalization import EPS, get_num_output_features
|
|
|
|
|
|
class Postprocessor(nn.Module):
|
|
"""
|
|
Inverting action
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
normalization_parameters: Dict[int, NormalizationParameters],
|
|
use_gpu: bool,
|
|
) -> None:
|
|
super().__init__()
|
|
self.num_output_features = get_num_output_features(normalization_parameters)
|
|
feature_types = {
|
|
norm_param.feature_type for norm_param in normalization_parameters.values()
|
|
}
|
|
assert (
|
|
len(feature_types) == 1
|
|
), "All dimensions of actions should have the same preprocessing"
|
|
self.feature_type = list(feature_types)[0]
|
|
assert self.feature_type in {
|
|
DISCRETE_ACTION,
|
|
CONTINUOUS_ACTION,
|
|
DO_NOT_PREPROCESS,
|
|
}, f"{self.feature_type} is not DISCRETE_ACTION, CONTINUOUS_ACTION or DO_NOT_PREPROCESS"
|
|
|
|
self.device = torch.device("cuda" if use_gpu else "cpu")
|
|
|
|
if self.feature_type == CONTINUOUS_ACTION:
|
|
sorted_features = sorted(normalization_parameters.keys())
|
|
self.min_serving_value = torch.tensor(
|
|
[normalization_parameters[f].min_value for f in sorted_features],
|
|
device=self.device,
|
|
).float()
|
|
self.scaling_factor = torch.tensor(
|
|
[
|
|
(
|
|
# pyre-fixme[58]: `-` is not supported for operand types
|
|
# `Optional[float]` and `Optional[float]`.
|
|
normalization_parameters[f].max_value
|
|
- normalization_parameters[f].min_value
|
|
)
|
|
/ (2 * (1 - EPS))
|
|
for f in sorted_features
|
|
],
|
|
device=self.device,
|
|
).float()
|
|
self.almost_one = torch.tensor(1.0 - EPS, device=self.device).float()
|
|
|
|
def input_prototype(self) -> Tuple[torch.Tensor]:
|
|
return (torch.randn(1, self.num_output_features),)
|
|
|
|
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
|
if self.feature_type == CONTINUOUS_ACTION:
|
|
# Please don't re-order; ONNX messed up tensor type when torch.clamp is
|
|
# the first operand.
|
|
return (
|
|
self.almost_one + torch.clamp(input, -self.almost_one, self.almost_one)
|
|
) * self.scaling_factor + self.min_serving_value
|
|
return input
|