Upgrade ReAgent to use Python 3.8 (#415)

Summary: Pull Request resolved: https://github.com/facebookresearch/ReAgent/pull/415 Currently, we have some test failures (https://app.circleci.com/pipelines/github/facebookresearch/ReAgent/1460/workflows/ecc21254-779b-4a89-a40d-ea317e839d96/jobs/8655) because we miss some latest features. Reviewed By: MisterTea Differential Revision: D26977836 fbshipit-source-id: 9243d194ddf5c62895c9f1369830309c379fd7dd
2026-05-17 12:40:39 +00:00 · 2021-06-09 19:41:41 -07:00
parent ae12656790
commit cae245749a
14 changed files with 337 additions and 144 deletions
@@ -83,15 +83,17 @@ commands:
          name: Installing SDKs
          command: |
            mv ~/.bashrc ~/.bashrc.bk
+            sudo apt-get update
+            sudo apt-get install bc
+            sudo apt-get install unzip
+            sudo apt-get install zip
            curl -s "https://get.sdkman.io" | bash
            source "$HOME/.sdkman/bin/sdkman-init.sh"
            sdk version
            sdk install java 8.0.272.hs-adpt
            sdk install scala
            sdk install maven
-            sdk install spark 2.4.6
-            sudo apt-get update
-            sudo apt-get install bc
+            sdk install spark 3.1.1
      - run:
          name: Build preprocessing package
          command: |
@@ -140,12 +142,16 @@ commands:
          steps:
            - run:
                command: |
-                  pyenv global 3.7.0
+                  pyenv install -v 3.8.1
+                  pyenv global 3.8.1
      - run:
          command: |
-            pip install --upgrade pip
-            pip install tox==3.20.1
-            pip install --upgrade wheel setuptools
+            sudo apt update
+            sudo apt install cmake
+            sudo apt install swig
+            pip install --upgrade pip --progress-bar off
+            pip install --upgrade wheel setuptools --progress-bar off
+            pip install tox==3.20.1 --progress-bar off
      - when:
          condition: << parameters.install_gym >>
          steps:
@@ -154,13 +160,13 @@ commands:
                steps:
                  - run:
                      command: |
-                        pip install -e .[gym,test] --pre -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html
+                        pip install -e .[gym,test] --pre -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html --progress-bar off
            - unless:
                condition: << parameters.is_ubuntu_gpu >>
                steps:
                  - run:
                      command: |
-                        sudo pip install -e .[gym,test] --pre -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+                        sudo pip install -e .[gym,test] --pre -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html --progress-bar off

  run_unittest:
    description: Run unittests, coverage and save results
@@ -171,14 +177,14 @@ commands:
      - run:
          no_output_timeout: 30m
          command: |
-            tox -vv -e << parameters.tox_env >>
+            tox -v -e << parameters.tox_env >>
            bash <(curl -s https://codecov.io/bash)
-      - run: python setup.py bdist_wheel
+      - run: python setup.py -q bdist_wheel
      - store_artifacts:
          path: dist/reagent-0.1-py3-none-any.whl
          destination: reagent-0.1-py3-none-any.whl
      - store_test_results:
-          path: .tox/py37/log/
+          path: .tox/py38/log/

  run_interrogate:
    description: Install and run interrogate
@@ -186,16 +192,16 @@ commands:
      - run:
          name: Install interrogate
          command: |
-            pip install interrogate
+            pip install interrogate --progress-bar off
      - run:
          name: Run interrogate on reagent code base
          command: |
            interrogate -piImvv -f 15 reagent/

 jobs:
-  gpu_unittest:
+  misc_unittest:
    machine:
-      image: ubuntu-1604-cuda-10.1:201909-23
+      image: ubuntu-1604-cuda-10.2:202012-01
    resource_class: gpu.medium
    environment:
      - CUDA_LAUNCH_BLOCKING: 1
@@ -205,11 +211,47 @@ jobs:
          install_gym: false
          is_ubuntu_gpu: true
      - run_unittest:
-          tox_env: circleci_unittest
+          tox_env: circleci_misc_unittest

-  gym_unittest:
+  gym_cpu_unittest:
    machine:
-      image: ubuntu-1604-cuda-10.1:201909-23
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: large
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_cpu_unittest
+
+  gym_replay_buffer_cpu_unittest_1:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: large
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_replay_buffer_1_cpu_unittest
+
+  gym_replay_buffer_cpu_unittest_2:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: large
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_replay_buffer_2_cpu_unittest
+
+  gym_gpu_unittest:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
    resource_class: gpu.medium
    environment:
      - CUDA_LAUNCH_BLOCKING: 1
@@ -219,11 +261,39 @@ jobs:
          install_gym: false
          is_ubuntu_gpu: true
      - run_unittest:
-          tox_env: circleci_gym_unittest
+          tox_env: circleci_gym_gpu_unittest
+
+  gym_replay_buffer_gpu_unittest_1:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_replay_buffer_1_gpu_unittest
+
+  gym_replay_buffer_gpu_unittest_2:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_replay_buffer_2_gpu_unittest

  dqn_cartpole_e2e:
    docker:
-      - image: circleci/python:3.7
+      - image: circleci/python:3.8
    resource_class: large
    environment:
      - BASH_ENV: ~/.bashrc
@@ -235,9 +305,9 @@ jobs:
          is_ubuntu_gpu: false
      - end_to_end_test

-  seq2slate_e2e:
+  ranking_unittest:
    machine:
-      image: ubuntu-1604-cuda-10.1:201909-23
+      image: ubuntu-1604-cuda-10.2:202012-01
    resource_class: gpu.medium
    environment:
      - CUDA_LAUNCH_BLOCKING: 1
@@ -247,11 +317,53 @@ jobs:
          install_gym: true
          is_ubuntu_gpu: true
      - run_unittest:
-          tox_env: circleci_seq2slate_unittest
+          tox_env: circleci_ranking_unittest
+
+  training_unittest:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: true
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_training_unittest
+
+  prediction_unittest:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: true
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_prediction_unittest
+
+  world_model_unittest:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: true
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_world_model_unittest

  sac_pendulum_e2e:
    docker:
-      - image: circleci/python:3.7
+      - image: circleci/python:3.8
    resource_class: large
    environment:
      - BASH_ENV: ~/.bashrc
@@ -265,7 +377,7 @@ jobs:

  sac_pendulum_e2e_gpu:
    machine:
-      image: ubuntu-1604-cuda-10.1:201909-23
+      image: ubuntu-1604-cuda-10.2:202012-01
    resource_class: gpu.medium
    environment:
      - CONFIG: reagent/workflow/sample_configs/sac_pendulum_offline.yaml
@@ -301,7 +413,7 @@ jobs:

  docstring_coverage:
    docker:
-      - image: circleci/python:3.7
+      - image: circleci/python:3.8
    resource_class: small
    steps:
      - checkout_merge
@@ -310,12 +422,20 @@ jobs:
 workflows:
  build:
    jobs:
-      - seq2slate_e2e
+      - ranking_unittest
+      - training_unittest
+      - prediction_unittest
+      - world_model_unittest
      - dqn_cartpole_e2e
      - sac_pendulum_e2e
      - sac_pendulum_e2e_gpu
-      - gpu_unittest
-      - gym_unittest
+      - misc_unittest
+      - gym_cpu_unittest
+      - gym_gpu_unittest
+      - gym_replay_buffer_cpu_unittest_1
+      - gym_replay_buffer_cpu_unittest_2
+      - gym_replay_buffer_gpu_unittest_1
+      - gym_replay_buffer_gpu_unittest_2
      - rasp_test_linux
      - rasp_test_mac
      - docstring_coverage
@@ -7,7 +7,7 @@ ReAgent CLI & Python API
 ^^^^^^^^^^^^^^^^^^^^^^^^

 We have CLI to launch training & Python API to use programmatically, e.g., in your own script or Jupyter Notebook.
-To install this component, you will need to have Python 3.7+ installed on your system.
+To install this component, you will need to have Python 3.8+ installed on your system.
 If you don't have that, you can either install it via `pyenv <https://github.com/pyenv/pyenv>`_ or
 `conda <https://docs.conda.io/projects/conda/en/latest/index.html>`_. To verify that you have the right version,
 type the following command on your shell:
@@ -24,7 +24,7 @@ Once you make sure you have the right version, you can simply clone this repo an
   cd ReAgent
   pip install ".[gym]"

-   # install nightly torch (change cpu to cu101/102 if fit)
+   # install nightly torch (change cpu to cu102 if fit)
   pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html

 If you don't want need gym dependencies, you can remove :code:`[gym]`
@@ -49,7 +49,7 @@ To build from source, you'll need JDK, Scala, & Maven. We will use `SDKMAN! <htt
   curl -s "https://get.sdkman.io" | bash
   source "$HOME/.sdkman/bin/sdkman-init.sh"
   sdk version
-   sdk install java 8.0.265.hs-adpt
+   sdk install java 8.0.272.hs-adpt
   sdk install scala
   sdk install maven

@@ -57,7 +57,7 @@ If you are testing locally, you can also install Spark

 .. code-block:: bash

-   sdk install spark 2.4.6
+   sdk install spark 3.1.1

 Now, you can build our preprocessing JAR

@@ -37,13 +37,13 @@
      provided
    </parquet.deps.scope>
    <scala.version>
-      2.11.7
+      2.12.10
    </scala.version>
    <scala.binary.version>
-      2.11
+      2.12
    </scala.binary.version>
    <spark.version>
-      2.3.2
+      3.1.1
    </spark.version>
  </properties>
  <dependencies>
@@ -55,7 +55,7 @@
        scalatest_${scala.binary.version}
      </artifactId>
      <version>
-        2.2.6
+        3.2.5
      </version>
      <scope>
        test
@@ -69,7 +69,7 @@
        jacoco-maven-plugin
      </artifactId>
      <version>
-        0.8.5
+        0.8.6
      </version>
      <scope>
        test
@@ -97,7 +97,7 @@
        scalacheck_${scala.binary.version}
      </artifactId>
      <version>
-        1.13.5
+        1.14.1
      </version>
      <scope>
        test
@@ -293,7 +293,7 @@
          scala-maven-plugin
        </artifactId>
        <version>
-          3.2.2
+          4.4.1
        </version>
        <executions>
          <execution>
@@ -128,10 +128,18 @@ object Timeline {
      sqlContext: SQLContext,
      config: TimelineConfiguration
  ): Unit = {
-    var filterTerminal = "HAVING next_state_features IS NOT NULL";
+    var filterTerminal = "WHERE next_state_features IS NOT NULL";
    if (config.addTerminalStateRow) {
      filterTerminal = "";
    }
+    var filterTimeLimit = "";
+    if (config.timeWindowLimit != None) {
+      if (filterTerminal == "") {
+        filterTimeLimit = s"WHERE time_since_first <= ${config.timeWindowLimit.get}";
+      } else {
+        filterTimeLimit = s" AND time_since_first <= ${config.timeWindowLimit.get}";
+      }
+    }

    val actionDataType =
      Helper.getDataTypes(sqlContext, config.inputTableName, List("action"))("action")
@@ -193,23 +201,6 @@ object Timeline {
      case (acc, (k, v)) => s"${acc}, a.${k}"
    }

-    val timeLimitedSourceTable = config.timeWindowLimit
-      .map { timeLimit =>
-        s"""
-        , time_limited_source_table AS (
-            SELECT
-                *,
-                sequence_number - FIRST(sequence_number) OVER (
-                     PARTITION BY mdp_id
-                     ORDER BY mdp_id, sequence_number
-                ) AS time_since_first
-            FROM source_table
-            HAVING time_since_first <= ${timeLimit}
-        )
-        """.stripMargin
-      }
-      .getOrElse("")
-
    val sourceTable = s"""
    WITH ${mdpFilter}
        source_table AS (
@@ -225,15 +216,8 @@ object Timeline {
            ${joinClause}
            a.ds BETWEEN '${config.startDs}' AND '${config.endDs}'
        )
-        ${timeLimitedSourceTable}
    """.stripMargin

-    val sourceTableName = config.timeWindowLimit
-      .map { _ =>
-        "time_limited_source_table"
-      }
-      .getOrElse("source_table")
-
    val rewardColumnsQuery = rewardColumnDataTypes.foldLeft("") {
      case (acc, (k, v)) => s"${acc}, ${k}"
    }
@@ -253,53 +237,59 @@ object Timeline {
    }

    val sqlCommand = s"""
-    ${sourceTable}
+    ${sourceTable},
+    joined_table AS (
+      SELECT
+          mdp_id,
+          state_features,
+          action,
+          LEAD(action) OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ) AS next_action,
+          action_probability
+          ${rewardColumnsQuery},
+          LEAD(state_features) OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ) AS next_state_features,
+          sequence_number,
+          ROW_NUMBER() OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ) AS sequence_number_ordinal,
+          COALESCE(LEAD(sequence_number) OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ), sequence_number) - sequence_number AS time_diff,
+          sequence_number - FIRST(sequence_number) OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ) AS time_since_first
+          ${timelineJoinColumnsQuery}
+      FROM source_table
+      CLUSTER BY HASH(mdp_id, sequence_number)
+    )
    SELECT
-        mdp_id,
-        state_features,
-        action,
-        LEAD(action) OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ) AS next_action,
-        action_probability
-        ${rewardColumnsQuery},
-        LEAD(state_features) OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ) AS next_state_features,
-        sequence_number,
-        ROW_NUMBER() OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ) AS sequence_number_ordinal,
-        COALESCE(LEAD(sequence_number) OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ), sequence_number) - sequence_number AS time_diff,
-        sequence_number - FIRST(sequence_number) OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ) AS time_since_first
-        ${timelineJoinColumnsQuery}
-    FROM ${sourceTableName}
+      *
+    FROM joined_table
    ${filterTerminal}
-    CLUSTER BY HASH(mdp_id, sequence_number)
+    ${filterTimeLimit}
    """.stripMargin
    log.info("Executing query: ")
    log.info(sqlCommand)
@@ -12,12 +12,13 @@ import org.apache.spark.sql.functions.col
 import org.apache.spark.sql._
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql.types._
-import org.scalatest.{BeforeAndAfterAll, FunSuiteLike, Suite}
+import org.scalatest.{BeforeAndAfterAll, Suite}
+import org.scalatest.funsuite.AnyFunSuiteLike

 import scala.collection.mutable
 import scala.math.abs

-trait PipelineTester extends FunSuiteLike with BeforeAndAfterAll with TestLogging { this: Suite =>
+trait PipelineTester extends AnyFunSuiteLike with BeforeAndAfterAll with TestLogging { this: Suite =>

  @transient private var _sparkContext: SparkContext = _
  def sparkContext: SparkContext = _sparkContext
@@ -10,8 +10,8 @@ import org.scalatest._
 import scala.collection.JavaConversions._
 import scala.util.Try

-trait TestLogging extends BeforeAndAfterAll with BeforeAndAfterEach with TestLogger {
-  this: Suite =>
+trait TestLogging extends BeforeAndAfterAll with BeforeAndAfterEach with TestLogger with TestSuiteMixin {
+  this: TestSuite =>

  private val logLayout = new EnhancedPatternLayout("%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n")

@@ -72,6 +72,7 @@ trait TestLogging extends BeforeAndAfterAll with BeforeAndAfterEach with TestLog
      val scopes = test.scopes
      val text = test.text
      val tags = test.tags
+      val pos = test.pos
    }

    super.withFixture(wrappedTest)
@@ -27,9 +27,9 @@ model:
        - leaky_relu
    eval_parameters:
      calc_cpe_in_training: false
-replay_memory_size: 20000
+replay_memory_size: 100000
 train_every_ts: 1
-train_after_ts: 5000
+train_after_ts: 20000
 num_train_episodes: 30
 num_eval_episodes: 20
 passing_score_bar: 100.0
@@ -4,6 +4,7 @@ import logging
 import os
 import pprint
 import unittest
+import uuid
 from typing import Optional, Dict, Any

 import numpy as np
@@ -48,7 +49,7 @@ Format path to be: "configs/<env_name>/<model_name>_<env_name>_online.yaml."
 NOTE: These tests should ideally finish quickly (within 10 minutes) since they are
 unit tests which are run many times.
 """
-REPLAY_BUFFER_GYM_TESTS = [
+REPLAY_BUFFER_GYM_TESTS_1 = [
    ("Discrete CRR Cartpole", "configs/cartpole/discrete_crr_cartpole_online.yaml"),
    ("Discrete DQN Cartpole", "configs/cartpole/discrete_dqn_cartpole_online.yaml"),
    ("Discrete C51 Cartpole", "configs/cartpole/discrete_c51_cartpole_online.yaml"),
@@ -58,6 +59,8 @@ REPLAY_BUFFER_GYM_TESTS = [
        "configs/open_gridworld/discrete_dqn_open_gridworld.yaml",
    ),
    ("SAC Pendulum", "configs/pendulum/sac_pendulum_online.yaml"),
+]
+REPLAY_BUFFER_GYM_TESTS_2 = [
    ("Continuous CRR Pendulum", "configs/pendulum/continuous_crr_pendulum_online.yaml"),
    ("TD3 Pendulum", "configs/pendulum/td3_pendulum_online.yaml"),
    ("Parametric DQN Cartpole", "configs/cartpole/parametric_dqn_cartpole_online.yaml"),
@@ -91,8 +94,16 @@ curr_dir = os.path.dirname(__file__)

 class TestGym(HorizonTestBase):
    # pyre-fixme[16]: Module `parameterized` has no attribute `expand`.
-    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS)
-    def test_replay_buffer_gym_cpu(self, name: str, config_path: str):
+    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS_1)
+    def test_replay_buffer_gym_cpu_1(self, name: str, config_path: str):
+        self._test_replay_buffer_gym_cpu(name, config_path)
+
+    # pyre-fixme[16]: Module `parameterized` has no attribute `expand`.
+    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS_2)
+    def test_replay_buffer_gym_cpu_2(self, name: str, config_path: str):
+        self._test_replay_buffer_gym_cpu(name, config_path)
+
+    def _test_replay_buffer_gym_cpu(self, name: str, config_path: str):
        logger.info(f"Starting {name} on CPU")
        self.run_from_config(
            run_test=run_test_replay_buffer,
@@ -102,10 +113,20 @@ class TestGym(HorizonTestBase):
        logger.info(f"{name} passes!")

    # pyre-fixme[16]: Module `parameterized` has no attribute `expand`.
-    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS)
+    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS_1)
    @pytest.mark.serial
    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_replay_buffer_gym_gpu(self, name: str, config_path: str):
+    def test_replay_buffer_gym_gpu_1(self, name: str, config_path: str):
+        self._test_replay_buffer_gym_gpu(name, config_path)
+
+    # pyre-fixme[16]: Module `parameterized` has no attribute `expand`.
+    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS_2)
+    @pytest.mark.serial
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_replay_buffer_gym_gpu_2(self, name: str, config_path: str):
+        self._test_replay_buffer_gym_gpu(name, config_path)
+
+    def _test_replay_buffer_gym_gpu(self, name: str, config_path: str):
        logger.info(f"Starting {name} on GPU")
        self.run_from_config(
            run_test=run_test_replay_buffer,
@@ -263,7 +284,12 @@ def run_test_replay_buffer(
        device=device,
    )
    data_loader = torch.utils.data.DataLoader(dataset, collate_fn=identity_collate)
-    pl_trainer = pl.Trainer(max_epochs=1, gpus=int(use_gpu))
+    pl_trainer = pl.Trainer(
+        max_epochs=1,
+        gpus=int(use_gpu),
+        deterministic=True,
+        default_root_dir=f"lightning_log_{str(uuid.uuid4())}",
+    )
    # Note: the fit() function below also evaluates the agent along the way
    # and adds the new transitions to the replay buffer, so it is training
    # on incrementally larger and larger buffers.
@@ -311,7 +337,12 @@ def run_test_online_episode(
    agent = Agent.create_for_env(env, policy, device=device)

    if isinstance(trainer, pl.LightningModule):
-        pl_trainer = pl.Trainer(max_epochs=1, gpus=int(use_gpu), deterministic=True)
+        pl_trainer = pl.Trainer(
+            max_epochs=1,
+            gpus=int(use_gpu),
+            deterministic=True,
+            default_root_dir=f"lightning_log_{str(uuid.uuid4())}",
+        )
        dataset = EpisodicDataset(
            env=env, agent=agent, num_episodes=num_train_episodes, seed=SEED
        )
@@ -64,7 +64,7 @@ def replay_buffer_to_pre_timeline_df(
        "ds": [DEFAULT_DS for _ in range(n)],
        "state_features": _dense_to_sparse(batch.state),
        "action": action,
-        "mdp_id": batch.mdp_id.tolist(),
+        "mdp_id": list(map(str, batch.mdp_id.flatten().tolist())),
        "sequence_number": sequence_number,
        "action_probability": action_probability,
        "reward": reward,
@@ -121,7 +121,8 @@ def train_and_eval(trainer, data, num_eval_batches=100, max_epochs=1):
    train_dataloader = DataLoader(data[:-num_eval_batches], collate_fn=lambda x: x[0])
    eval_data = data[-num_eval_batches:]

-    pl_trainer = pl.Trainer(max_epochs=max_epochs)
+    # disable logging in tests
+    pl_trainer = pl.Trainer(max_epochs=max_epochs, logger=False)
    pl_trainer.fit(trainer, train_dataloader)

    total_loss = 0
@@ -262,8 +262,7 @@ def train_workflow(

    output_paths = {}
    for module_name, serving_module in model_manager.build_serving_modules().items():
-        # TODO: make this a parameter
-        torchscript_output_path = f"model_{round(time.time())}.torchscript"
+        torchscript_output_path = f"{model_manager.__class__.__name__}_{module_name}_{round(time.time())}.torchscript"
        torch.jit.save(serving_module, torchscript_output_path)
        logger.info(f"Saved {module_name} to {torchscript_output_path}")
        output_paths[module_name] = torchscript_output_path
@@ -1 +1 @@
-python>=3.7
+python>=3.8
@@ -10,7 +10,7 @@ license = BSD 3-Clause License

 [options]
 packages = find:
-python_requires = >=3.7
+python_requires = >=3.8
 install_requires =
  click>=7.0
  # ~=1.2.0 for compatibility with gym
@@ -25,7 +25,7 @@ install_requires =
  tqdm>=4.46.0
  petastorm>=0.9.0
  parameterized>=0.7.4
-  pyspark==2.4.6
+  pyspark==3.1.1
  pytorch-lightning==1.1.5
  ruamel.yaml>=0.15.99
  scipy>=1.3.1
@@ -3,13 +3,16 @@
 # test suite on all supported python versions. To use it, "pip install tox"
 # and then run "tox" from this directory.

-[tox]
-envlist = py37
+# This post discusses how to specify patterns for testing specific tests
+# https://stackoverflow.com/questions/36456920/is-there-a-way-to-specify-which-pytest-tests-to-run-from-a-file

-# install CUDA 10.1 Torch
+[tox]
+envlist = py38
+
+# install CUDA 10.2 Torch
 [ubuntu_gpu]
 install_command =
-    pip install --pre -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html {opts} {packages}
+    pip install --pre -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html {opts} {packages} --progress-bar off

 [pytest]
 addopts = --verbose -d --tx popen --cov=reagent --cov-report=xml --cov-append --junitxml={envlogdir}/junit-{envname}.xml
@@ -25,25 +28,72 @@ extras =
    gym
    test
 install_command =
-    pip install --pre -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html {opts} {packages}
+    pip install --pre -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html {opts} {packages} --progress-bar off
 commands =
-    pytest -n 4 -m "(not serial) and (not seq2slate_long)"
+    pytest -n2 -m "(not serial) and (not seq2slate_long)"
    pytest -n0 -m "serial"

-[testenv:circleci_unittest]
+[testenv:circleci_misc_unittest]
 install_command = {[ubuntu_gpu]install_command}
 commands =
-    pytest reagent/test -n auto -m "(not serial) and (not seq2slate_long)"
-    pytest reagent/test -n0 -m "serial"
+    pytest reagent/test -n2 -m "not serial" --ignore=reagent/test/ranking/ --ignore=reagent/test/training/ --ignore=reagent/test/prediction/ --ignore=reagent/test/world_model/
+    pytest reagent/test -n0 -m "serial" --ignore=reagent/test/ranking/ --ignore=reagent/test/training/ --ignore=reagent/test/prediction/ --ignore=reagent/test/world_model/

-[testenv:circleci_gym_unittest]
+
+[testenv:circleci_gym_replay_buffer_1_cpu_unittest]
+commands =
+    pytest reagent/gym/tests -n2 -m "not serial" -k "test_replay_buffer_gym_cpu_1"
+
+
+[testenv:circleci_gym_replay_buffer_2_cpu_unittest]
+commands =
+    pytest reagent/gym/tests -n2 -m "not serial" -k "test_replay_buffer_gym_cpu_2"
+
+
+# all cpu tests in reagent/gym/tests except test_replay_buffer_gym_cpu_x
+[testenv:circleci_gym_cpu_unittest]
+commands =
+    pytest reagent/gym/tests -n2 -m "not serial" -k "not test_replay_buffer_gym_cpu"
+
+
+[testenv:circleci_gym_replay_buffer_1_gpu_unittest]
 install_command = {[ubuntu_gpu]install_command}
 commands =
-    pytest reagent/gym/tests -n2 -m "(not serial) and (not seq2slate_long)"
-    pytest reagent/gym/tests -n0 -m "serial"
+    pytest reagent/gym/tests -n0 -m "serial" -k "test_replay_buffer_gym_gpu_1"


-[testenv:circleci_seq2slate_unittest]
+[testenv:circleci_gym_replay_buffer_2_gpu_unittest]
 install_command = {[ubuntu_gpu]install_command}
 commands =
-    pytest reagent/test -n0 -m "seq2slate_long"
+    pytest reagent/gym/tests -n0 -m "serial" -k "test_replay_buffer_gym_gpu_2"
+
+
+# all gpu tests in reagent/gym/tests except test_replay_buffer_gym_gpu_x
+[testenv:circleci_gym_gpu_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/gym/tests -n0 -m "serial" -k "not test_replay_buffer_gym_gpu"
+
+
+[testenv:circleci_ranking_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/test/ranking -n2
+
+
+[testenv:circleci_training_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/test/training -n2
+
+
+[testenv:circleci_prediction_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/test/prediction -n2
+
+
+[testenv:circleci_world_model_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/test/world_model -n2