diff --git a/.circleci/config.yml b/.circleci/config.yml
index 15bddda3..7dd55228 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -83,15 +83,17 @@ commands:
           name: Installing SDKs
           command: |
             mv ~/.bashrc ~/.bashrc.bk
+            sudo apt-get update
+            sudo apt-get install bc
+            sudo apt-get install unzip
+            sudo apt-get install zip
             curl -s "https://get.sdkman.io" | bash
             source "$HOME/.sdkman/bin/sdkman-init.sh"
             sdk version
             sdk install java 8.0.272.hs-adpt
             sdk install scala
             sdk install maven
-            sdk install spark 2.4.6
-            sudo apt-get update
-            sudo apt-get install bc
+            sdk install spark 3.1.1
       - run:
           name: Build preprocessing package
           command: |
@@ -140,12 +142,16 @@ commands:
           steps:
             - run:
                 command: |
-                  pyenv global 3.7.0
+                  pyenv install -v 3.8.1
+                  pyenv global 3.8.1
       - run:
           command: |
-            pip install --upgrade pip
-            pip install tox==3.20.1
-            pip install --upgrade wheel setuptools
+            sudo apt update
+            sudo apt install cmake
+            sudo apt install swig
+            pip install --upgrade pip --progress-bar off
+            pip install --upgrade wheel setuptools --progress-bar off
+            pip install tox==3.20.1 --progress-bar off
       - when:
           condition: << parameters.install_gym >>
           steps:
@@ -154,13 +160,13 @@ commands:
                 steps:
                   - run:
                       command: |
-                        pip install -e .[gym,test] --pre -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html
+                        pip install -e .[gym,test] --pre -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html --progress-bar off
             - unless:
                 condition: << parameters.is_ubuntu_gpu >>
                 steps:
                   - run:
                       command: |
-                        sudo pip install -e .[gym,test] --pre -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+                        sudo pip install -e .[gym,test] --pre -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html --progress-bar off
 
   run_unittest:
     description: Run unittests, coverage and save results
@@ -171,14 +177,14 @@ commands:
       - run:
           no_output_timeout: 30m
           command: |
-            tox -vv -e << parameters.tox_env >>
+            tox -v -e << parameters.tox_env >>
             bash <(curl -s https://codecov.io/bash)
-      - run: python setup.py bdist_wheel
+      - run: python setup.py -q bdist_wheel
       - store_artifacts:
           path: dist/reagent-0.1-py3-none-any.whl
           destination: reagent-0.1-py3-none-any.whl
       - store_test_results:
-          path: .tox/py37/log/
+          path: .tox/py38/log/
 
   run_interrogate:
     description: Install and run interrogate
@@ -186,16 +192,16 @@ commands:
       - run:
           name: Install interrogate
           command: |
-            pip install interrogate
+            pip install interrogate --progress-bar off
       - run:
           name: Run interrogate on reagent code base
           command: |
             interrogate -piImvv -f 15 reagent/
 
 jobs:
-  gpu_unittest:
+  misc_unittest:
     machine:
-      image: ubuntu-1604-cuda-10.1:201909-23
+      image: ubuntu-1604-cuda-10.2:202012-01
     resource_class: gpu.medium
     environment:
       - CUDA_LAUNCH_BLOCKING: 1
@@ -205,11 +211,47 @@ jobs:
           install_gym: false
           is_ubuntu_gpu: true
       - run_unittest:
-          tox_env: circleci_unittest
+          tox_env: circleci_misc_unittest
 
-  gym_unittest:
+  gym_cpu_unittest:
     machine:
-      image: ubuntu-1604-cuda-10.1:201909-23
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: large
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_cpu_unittest
+
+  gym_replay_buffer_cpu_unittest_1:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: large
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_replay_buffer_1_cpu_unittest
+
+  gym_replay_buffer_cpu_unittest_2:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: large
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_replay_buffer_2_cpu_unittest
+
+  gym_gpu_unittest:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
     resource_class: gpu.medium
     environment:
       - CUDA_LAUNCH_BLOCKING: 1
@@ -219,11 +261,39 @@ jobs:
           install_gym: false
           is_ubuntu_gpu: true
       - run_unittest:
-          tox_env: circleci_gym_unittest
+          tox_env: circleci_gym_gpu_unittest
+
+  gym_replay_buffer_gpu_unittest_1:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_replay_buffer_1_gpu_unittest
+
+  gym_replay_buffer_gpu_unittest_2:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: false
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_gym_replay_buffer_2_gpu_unittest
 
   dqn_cartpole_e2e:
     docker:
-      - image: circleci/python:3.7
+      - image: circleci/python:3.8
     resource_class: large
     environment:
       - BASH_ENV: ~/.bashrc
@@ -235,9 +305,9 @@ jobs:
           is_ubuntu_gpu: false
       - end_to_end_test
 
-  seq2slate_e2e:
+  ranking_unittest:
     machine:
-      image: ubuntu-1604-cuda-10.1:201909-23
+      image: ubuntu-1604-cuda-10.2:202012-01
     resource_class: gpu.medium
     environment:
       - CUDA_LAUNCH_BLOCKING: 1
@@ -247,11 +317,53 @@ jobs:
           install_gym: true
           is_ubuntu_gpu: true
       - run_unittest:
-          tox_env: circleci_seq2slate_unittest
+          tox_env: circleci_ranking_unittest
+
+  training_unittest:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: true
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_training_unittest
+
+  prediction_unittest:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: true
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_prediction_unittest
+
+  world_model_unittest:
+    machine:
+      image: ubuntu-1604-cuda-10.2:202012-01
+    resource_class: gpu.medium
+    environment:
+      - CUDA_LAUNCH_BLOCKING: 1
+    steps:
+      - checkout_merge
+      - pip_install:
+          install_gym: true
+          is_ubuntu_gpu: true
+      - run_unittest:
+          tox_env: circleci_world_model_unittest
 
   sac_pendulum_e2e:
     docker:
-      - image: circleci/python:3.7
+      - image: circleci/python:3.8
     resource_class: large
     environment:
       - BASH_ENV: ~/.bashrc
@@ -265,7 +377,7 @@ jobs:
 
   sac_pendulum_e2e_gpu:
     machine:
-      image: ubuntu-1604-cuda-10.1:201909-23
+      image: ubuntu-1604-cuda-10.2:202012-01
     resource_class: gpu.medium
     environment:
       - CONFIG: reagent/workflow/sample_configs/sac_pendulum_offline.yaml
@@ -301,7 +413,7 @@ jobs:
 
   docstring_coverage:
     docker:
-      - image: circleci/python:3.7
+      - image: circleci/python:3.8
     resource_class: small
     steps:
       - checkout_merge
@@ -310,12 +422,20 @@ jobs:
 workflows:
   build:
     jobs:
-      - seq2slate_e2e
+      - ranking_unittest
+      - training_unittest
+      - prediction_unittest
+      - world_model_unittest
       - dqn_cartpole_e2e
       - sac_pendulum_e2e
       - sac_pendulum_e2e_gpu
-      - gpu_unittest
-      - gym_unittest
+      - misc_unittest
+      - gym_cpu_unittest
+      - gym_gpu_unittest
+      - gym_replay_buffer_cpu_unittest_1
+      - gym_replay_buffer_cpu_unittest_2
+      - gym_replay_buffer_gpu_unittest_1
+      - gym_replay_buffer_gpu_unittest_2
       - rasp_test_linux
       - rasp_test_mac
       - docstring_coverage
diff --git a/docs/installation.rst b/docs/installation.rst
index c9e3cf2a..2b663205 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -7,7 +7,7 @@ ReAgent CLI & Python API
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 We have CLI to launch training & Python API to use programmatically, e.g., in your own script or Jupyter Notebook.
-To install this component, you will need to have Python 3.7+ installed on your system.
+To install this component, you will need to have Python 3.8+ installed on your system.
 If you don't have that, you can either install it via `pyenv <https://github.com/pyenv/pyenv>`_ or
 `conda <https://docs.conda.io/projects/conda/en/latest/index.html>`_. To verify that you have the right version,
 type the following command on your shell:
@@ -24,7 +24,7 @@ Once you make sure you have the right version, you can simply clone this repo an
    cd ReAgent
    pip install ".[gym]"
 
-   # install nightly torch (change cpu to cu101/102 if fit)
+   # install nightly torch (change cpu to cu102 if fit)
    pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
 
 If you don't want need gym dependencies, you can remove :code:`[gym]`
@@ -49,7 +49,7 @@ To build from source, you'll need JDK, Scala, & Maven. We will use `SDKMAN! <htt
    curl -s "https://get.sdkman.io" | bash
    source "$HOME/.sdkman/bin/sdkman-init.sh"
    sdk version
-   sdk install java 8.0.265.hs-adpt
+   sdk install java 8.0.272.hs-adpt
    sdk install scala
    sdk install maven
 
@@ -57,7 +57,7 @@ If you are testing locally, you can also install Spark
 
 .. code-block:: bash
 
-   sdk install spark 2.4.6
+   sdk install spark 3.1.1
 
 Now, you can build our preprocessing JAR
 
diff --git a/preprocessing/pom.xml b/preprocessing/pom.xml
index 0e3dc67a..fdb8c497 100644
--- a/preprocessing/pom.xml
+++ b/preprocessing/pom.xml
@@ -37,13 +37,13 @@
       provided
     </parquet.deps.scope>
     <scala.version>
-      2.11.7
+      2.12.10
     </scala.version>
     <scala.binary.version>
-      2.11
+      2.12
     </scala.binary.version>
     <spark.version>
-      2.3.2
+      3.1.1
     </spark.version>
   </properties>
   <dependencies>
@@ -55,7 +55,7 @@
         scalatest_${scala.binary.version}
       </artifactId>
       <version>
-        2.2.6
+        3.2.5
       </version>
       <scope>
         test
@@ -69,7 +69,7 @@
         jacoco-maven-plugin
       </artifactId>
       <version>
-        0.8.5
+        0.8.6
       </version>
       <scope>
         test
@@ -97,7 +97,7 @@
         scalacheck_${scala.binary.version}
       </artifactId>
       <version>
-        1.13.5
+        1.14.1
       </version>
       <scope>
         test
@@ -293,7 +293,7 @@
           scala-maven-plugin
         </artifactId>
         <version>
-          3.2.2
+          4.4.1
         </version>
         <executions>
           <execution>
diff --git a/preprocessing/src/main/scala/com/facebook/spark/rl/Timeline.scala b/preprocessing/src/main/scala/com/facebook/spark/rl/Timeline.scala
index db626085..abae6a57 100644
--- a/preprocessing/src/main/scala/com/facebook/spark/rl/Timeline.scala
+++ b/preprocessing/src/main/scala/com/facebook/spark/rl/Timeline.scala
@@ -128,10 +128,18 @@ object Timeline {
       sqlContext: SQLContext,
       config: TimelineConfiguration
   ): Unit = {
-    var filterTerminal = "HAVING next_state_features IS NOT NULL";
+    var filterTerminal = "WHERE next_state_features IS NOT NULL";
     if (config.addTerminalStateRow) {
       filterTerminal = "";
     }
+    var filterTimeLimit = "";
+    if (config.timeWindowLimit != None) {
+      if (filterTerminal == "") {
+        filterTimeLimit = s"WHERE time_since_first <= ${config.timeWindowLimit.get}";
+      } else {
+        filterTimeLimit = s" AND time_since_first <= ${config.timeWindowLimit.get}";
+      }
+    }
 
     val actionDataType =
       Helper.getDataTypes(sqlContext, config.inputTableName, List("action"))("action")
@@ -193,23 +201,6 @@ object Timeline {
       case (acc, (k, v)) => s"${acc}, a.${k}"
     }
 
-    val timeLimitedSourceTable = config.timeWindowLimit
-      .map { timeLimit =>
-        s"""
-        , time_limited_source_table AS (
-            SELECT
-                *,
-                sequence_number - FIRST(sequence_number) OVER (
-                     PARTITION BY mdp_id
-                     ORDER BY mdp_id, sequence_number
-                ) AS time_since_first
-            FROM source_table
-            HAVING time_since_first <= ${timeLimit}
-        )
-        """.stripMargin
-      }
-      .getOrElse("")
-
     val sourceTable = s"""
     WITH ${mdpFilter}
         source_table AS (
@@ -225,15 +216,8 @@ object Timeline {
             ${joinClause}
             a.ds BETWEEN '${config.startDs}' AND '${config.endDs}'
         )
-        ${timeLimitedSourceTable}
     """.stripMargin
 
-    val sourceTableName = config.timeWindowLimit
-      .map { _ =>
-        "time_limited_source_table"
-      }
-      .getOrElse("source_table")
-
     val rewardColumnsQuery = rewardColumnDataTypes.foldLeft("") {
       case (acc, (k, v)) => s"${acc}, ${k}"
     }
@@ -253,53 +237,59 @@ object Timeline {
     }
 
     val sqlCommand = s"""
-    ${sourceTable}
+    ${sourceTable},
+    joined_table AS (
+      SELECT
+          mdp_id,
+          state_features,
+          action,
+          LEAD(action) OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ) AS next_action,
+          action_probability
+          ${rewardColumnsQuery},
+          LEAD(state_features) OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ) AS next_state_features,
+          sequence_number,
+          ROW_NUMBER() OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ) AS sequence_number_ordinal,
+          COALESCE(LEAD(sequence_number) OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ), sequence_number) - sequence_number AS time_diff,
+          sequence_number - FIRST(sequence_number) OVER (
+              PARTITION BY
+                  mdp_id
+              ORDER BY
+                  mdp_id,
+                  sequence_number
+          ) AS time_since_first
+          ${timelineJoinColumnsQuery}
+      FROM source_table
+      CLUSTER BY HASH(mdp_id, sequence_number)
+    )
     SELECT
-        mdp_id,
-        state_features,
-        action,
-        LEAD(action) OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ) AS next_action,
-        action_probability
-        ${rewardColumnsQuery},
-        LEAD(state_features) OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ) AS next_state_features,
-        sequence_number,
-        ROW_NUMBER() OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ) AS sequence_number_ordinal,
-        COALESCE(LEAD(sequence_number) OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ), sequence_number) - sequence_number AS time_diff,
-        sequence_number - FIRST(sequence_number) OVER (
-            PARTITION BY
-                mdp_id
-            ORDER BY
-                mdp_id,
-                sequence_number
-        ) AS time_since_first
-        ${timelineJoinColumnsQuery}
-    FROM ${sourceTableName}
+      *
+    FROM joined_table
     ${filterTerminal}
-    CLUSTER BY HASH(mdp_id, sequence_number)
+    ${filterTimeLimit}
     """.stripMargin
     log.info("Executing query: ")
     log.info(sqlCommand)
diff --git a/preprocessing/src/test/scala/com/facebook/spark/common/testutil/PipelineTester.scala b/preprocessing/src/test/scala/com/facebook/spark/common/testutil/PipelineTester.scala
index 365791b5..17c1bb52 100644
--- a/preprocessing/src/test/scala/com/facebook/spark/common/testutil/PipelineTester.scala
+++ b/preprocessing/src/test/scala/com/facebook/spark/common/testutil/PipelineTester.scala
@@ -12,12 +12,13 @@ import org.apache.spark.sql.functions.col
 import org.apache.spark.sql._
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql.types._
-import org.scalatest.{BeforeAndAfterAll, FunSuiteLike, Suite}
+import org.scalatest.{BeforeAndAfterAll, Suite}
+import org.scalatest.funsuite.AnyFunSuiteLike
 
 import scala.collection.mutable
 import scala.math.abs
 
-trait PipelineTester extends FunSuiteLike with BeforeAndAfterAll with TestLogging { this: Suite =>
+trait PipelineTester extends AnyFunSuiteLike with BeforeAndAfterAll with TestLogging { this: Suite =>
 
   @transient private var _sparkContext: SparkContext = _
   def sparkContext: SparkContext = _sparkContext
diff --git a/preprocessing/src/test/scala/com/facebook/spark/common/testutil/TestLogging.scala b/preprocessing/src/test/scala/com/facebook/spark/common/testutil/TestLogging.scala
index b39ec1f5..5d146be5 100644
--- a/preprocessing/src/test/scala/com/facebook/spark/common/testutil/TestLogging.scala
+++ b/preprocessing/src/test/scala/com/facebook/spark/common/testutil/TestLogging.scala
@@ -10,8 +10,8 @@ import org.scalatest._
 import scala.collection.JavaConversions._
 import scala.util.Try
 
-trait TestLogging extends BeforeAndAfterAll with BeforeAndAfterEach with TestLogger {
-  this: Suite =>
+trait TestLogging extends BeforeAndAfterAll with BeforeAndAfterEach with TestLogger with TestSuiteMixin {
+  this: TestSuite =>
 
   private val logLayout = new EnhancedPatternLayout("%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n")
 
@@ -72,6 +72,7 @@ trait TestLogging extends BeforeAndAfterAll with BeforeAndAfterEach with TestLog
       val scopes = test.scopes
       val text = test.text
       val tags = test.tags
+      val pos = test.pos
     }
 
     super.withFixture(wrappedTest)
diff --git a/reagent/gym/tests/configs/cartpole/discrete_dqn_cartpole_online.yaml b/reagent/gym/tests/configs/cartpole/discrete_dqn_cartpole_online.yaml
index d0580af6..67f84f69 100644
--- a/reagent/gym/tests/configs/cartpole/discrete_dqn_cartpole_online.yaml
+++ b/reagent/gym/tests/configs/cartpole/discrete_dqn_cartpole_online.yaml
@@ -27,9 +27,9 @@ model:
         - leaky_relu
     eval_parameters:
       calc_cpe_in_training: false
-replay_memory_size: 20000
+replay_memory_size: 100000
 train_every_ts: 1
-train_after_ts: 5000
+train_after_ts: 20000
 num_train_episodes: 30
 num_eval_episodes: 20
 passing_score_bar: 100.0
diff --git a/reagent/gym/tests/test_gym.py b/reagent/gym/tests/test_gym.py
index 3eb15b54..f94a8244 100644
--- a/reagent/gym/tests/test_gym.py
+++ b/reagent/gym/tests/test_gym.py
@@ -4,6 +4,7 @@ import logging
 import os
 import pprint
 import unittest
+import uuid
 from typing import Optional, Dict, Any
 
 import numpy as np
@@ -48,7 +49,7 @@ Format path to be: "configs/<env_name>/<model_name>_<env_name>_online.yaml."
 NOTE: These tests should ideally finish quickly (within 10 minutes) since they are
 unit tests which are run many times.
 """
-REPLAY_BUFFER_GYM_TESTS = [
+REPLAY_BUFFER_GYM_TESTS_1 = [
     ("Discrete CRR Cartpole", "configs/cartpole/discrete_crr_cartpole_online.yaml"),
     ("Discrete DQN Cartpole", "configs/cartpole/discrete_dqn_cartpole_online.yaml"),
     ("Discrete C51 Cartpole", "configs/cartpole/discrete_c51_cartpole_online.yaml"),
@@ -58,6 +59,8 @@ REPLAY_BUFFER_GYM_TESTS = [
         "configs/open_gridworld/discrete_dqn_open_gridworld.yaml",
     ),
     ("SAC Pendulum", "configs/pendulum/sac_pendulum_online.yaml"),
+]
+REPLAY_BUFFER_GYM_TESTS_2 = [
     ("Continuous CRR Pendulum", "configs/pendulum/continuous_crr_pendulum_online.yaml"),
     ("TD3 Pendulum", "configs/pendulum/td3_pendulum_online.yaml"),
     ("Parametric DQN Cartpole", "configs/cartpole/parametric_dqn_cartpole_online.yaml"),
@@ -91,8 +94,16 @@ curr_dir = os.path.dirname(__file__)
 
 class TestGym(HorizonTestBase):
     # pyre-fixme[16]: Module `parameterized` has no attribute `expand`.
-    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS)
-    def test_replay_buffer_gym_cpu(self, name: str, config_path: str):
+    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS_1)
+    def test_replay_buffer_gym_cpu_1(self, name: str, config_path: str):
+        self._test_replay_buffer_gym_cpu(name, config_path)
+
+    # pyre-fixme[16]: Module `parameterized` has no attribute `expand`.
+    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS_2)
+    def test_replay_buffer_gym_cpu_2(self, name: str, config_path: str):
+        self._test_replay_buffer_gym_cpu(name, config_path)
+
+    def _test_replay_buffer_gym_cpu(self, name: str, config_path: str):
         logger.info(f"Starting {name} on CPU")
         self.run_from_config(
             run_test=run_test_replay_buffer,
@@ -102,10 +113,20 @@ class TestGym(HorizonTestBase):
         logger.info(f"{name} passes!")
 
     # pyre-fixme[16]: Module `parameterized` has no attribute `expand`.
-    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS)
+    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS_1)
     @pytest.mark.serial
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
-    def test_replay_buffer_gym_gpu(self, name: str, config_path: str):
+    def test_replay_buffer_gym_gpu_1(self, name: str, config_path: str):
+        self._test_replay_buffer_gym_gpu(name, config_path)
+
+    # pyre-fixme[16]: Module `parameterized` has no attribute `expand`.
+    @parameterized.expand(REPLAY_BUFFER_GYM_TESTS_2)
+    @pytest.mark.serial
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_replay_buffer_gym_gpu_2(self, name: str, config_path: str):
+        self._test_replay_buffer_gym_gpu(name, config_path)
+
+    def _test_replay_buffer_gym_gpu(self, name: str, config_path: str):
         logger.info(f"Starting {name} on GPU")
         self.run_from_config(
             run_test=run_test_replay_buffer,
@@ -263,7 +284,12 @@ def run_test_replay_buffer(
         device=device,
     )
     data_loader = torch.utils.data.DataLoader(dataset, collate_fn=identity_collate)
-    pl_trainer = pl.Trainer(max_epochs=1, gpus=int(use_gpu))
+    pl_trainer = pl.Trainer(
+        max_epochs=1,
+        gpus=int(use_gpu),
+        deterministic=True,
+        default_root_dir=f"lightning_log_{str(uuid.uuid4())}",
+    )
     # Note: the fit() function below also evaluates the agent along the way
     # and adds the new transitions to the replay buffer, so it is training
     # on incrementally larger and larger buffers.
@@ -311,7 +337,12 @@ def run_test_online_episode(
     agent = Agent.create_for_env(env, policy, device=device)
 
     if isinstance(trainer, pl.LightningModule):
-        pl_trainer = pl.Trainer(max_epochs=1, gpus=int(use_gpu), deterministic=True)
+        pl_trainer = pl.Trainer(
+            max_epochs=1,
+            gpus=int(use_gpu),
+            deterministic=True,
+            default_root_dir=f"lightning_log_{str(uuid.uuid4())}",
+        )
         dataset = EpisodicDataset(
             env=env, agent=agent, num_episodes=num_train_episodes, seed=SEED
         )
diff --git a/reagent/replay_memory/utils.py b/reagent/replay_memory/utils.py
index ed24eb66..237db1f5 100644
--- a/reagent/replay_memory/utils.py
+++ b/reagent/replay_memory/utils.py
@@ -64,7 +64,7 @@ def replay_buffer_to_pre_timeline_df(
         "ds": [DEFAULT_DS for _ in range(n)],
         "state_features": _dense_to_sparse(batch.state),
         "action": action,
-        "mdp_id": batch.mdp_id.tolist(),
+        "mdp_id": list(map(str, batch.mdp_id.flatten().tolist())),
         "sequence_number": sequence_number,
         "action_probability": action_probability,
         "reward": reward,
diff --git a/reagent/test/training/test_synthetic_reward_training.py b/reagent/test/training/test_synthetic_reward_training.py
index 964003b9..fe5fdc77 100644
--- a/reagent/test/training/test_synthetic_reward_training.py
+++ b/reagent/test/training/test_synthetic_reward_training.py
@@ -121,7 +121,8 @@ def train_and_eval(trainer, data, num_eval_batches=100, max_epochs=1):
     train_dataloader = DataLoader(data[:-num_eval_batches], collate_fn=lambda x: x[0])
     eval_data = data[-num_eval_batches:]
 
-    pl_trainer = pl.Trainer(max_epochs=max_epochs)
+    # disable logging in tests
+    pl_trainer = pl.Trainer(max_epochs=max_epochs, logger=False)
     pl_trainer.fit(trainer, train_dataloader)
 
     total_loss = 0
diff --git a/reagent/workflow/training.py b/reagent/workflow/training.py
index a871a061..02b5470f 100644
--- a/reagent/workflow/training.py
+++ b/reagent/workflow/training.py
@@ -262,8 +262,7 @@ def train_workflow(
 
     output_paths = {}
     for module_name, serving_module in model_manager.build_serving_modules().items():
-        # TODO: make this a parameter
-        torchscript_output_path = f"model_{round(time.time())}.torchscript"
+        torchscript_output_path = f"{model_manager.__class__.__name__}_{module_name}_{round(time.time())}.torchscript"
         torch.jit.save(serving_module, torchscript_output_path)
         logger.info(f"Saved {module_name} to {torchscript_output_path}")
         output_paths[module_name] = torchscript_output_path
diff --git a/serving/requirements.txt b/serving/requirements.txt
index 5d8d8dd7..aee8532a 100644
--- a/serving/requirements.txt
+++ b/serving/requirements.txt
@@ -1 +1 @@
-python>=3.7
+python>=3.8
diff --git a/setup.cfg b/setup.cfg
index b686ae34..f75112b6 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,7 +10,7 @@ license = BSD 3-Clause License
 
 [options]
 packages = find:
-python_requires = >=3.7
+python_requires = >=3.8
 install_requires =
   click>=7.0
   # ~=1.2.0 for compatibility with gym
@@ -25,7 +25,7 @@ install_requires =
   tqdm>=4.46.0
   petastorm>=0.9.0
   parameterized>=0.7.4
-  pyspark==2.4.6
+  pyspark==3.1.1
   pytorch-lightning==1.1.5
   ruamel.yaml>=0.15.99
   scipy>=1.3.1
diff --git a/tox.ini b/tox.ini
index bbf75818..baca7ce7 100644
--- a/tox.ini
+++ b/tox.ini
@@ -3,13 +3,16 @@
 # test suite on all supported python versions. To use it, "pip install tox"
 # and then run "tox" from this directory.
 
-[tox]
-envlist = py37
+# This post discusses how to specify patterns for testing specific tests
+# https://stackoverflow.com/questions/36456920/is-there-a-way-to-specify-which-pytest-tests-to-run-from-a-file
 
-# install CUDA 10.1 Torch
+[tox]
+envlist = py38
+
+# install CUDA 10.2 Torch
 [ubuntu_gpu]
 install_command =
-    pip install --pre -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html {opts} {packages}
+    pip install --pre -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html {opts} {packages} --progress-bar off
 
 [pytest]
 addopts = --verbose -d --tx popen --cov=reagent --cov-report=xml --cov-append --junitxml={envlogdir}/junit-{envname}.xml
@@ -25,25 +28,72 @@ extras =
     gym
     test
 install_command =
-    pip install --pre -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html {opts} {packages}
+    pip install --pre -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html {opts} {packages} --progress-bar off
 commands =
-    pytest -n 4 -m "(not serial) and (not seq2slate_long)"
+    pytest -n2 -m "(not serial) and (not seq2slate_long)"
     pytest -n0 -m "serial"
 
-[testenv:circleci_unittest]
+[testenv:circleci_misc_unittest]
 install_command = {[ubuntu_gpu]install_command}
 commands =
-    pytest reagent/test -n auto -m "(not serial) and (not seq2slate_long)"
-    pytest reagent/test -n0 -m "serial"
+    pytest reagent/test -n2 -m "not serial" --ignore=reagent/test/ranking/ --ignore=reagent/test/training/ --ignore=reagent/test/prediction/ --ignore=reagent/test/world_model/
+    pytest reagent/test -n0 -m "serial" --ignore=reagent/test/ranking/ --ignore=reagent/test/training/ --ignore=reagent/test/prediction/ --ignore=reagent/test/world_model/
 
-[testenv:circleci_gym_unittest]
+
+[testenv:circleci_gym_replay_buffer_1_cpu_unittest]
+commands =
+    pytest reagent/gym/tests -n2 -m "not serial" -k "test_replay_buffer_gym_cpu_1"
+
+
+[testenv:circleci_gym_replay_buffer_2_cpu_unittest]
+commands =
+    pytest reagent/gym/tests -n2 -m "not serial" -k "test_replay_buffer_gym_cpu_2"
+
+
+# all cpu tests in reagent/gym/tests except test_replay_buffer_gym_cpu_x
+[testenv:circleci_gym_cpu_unittest]
+commands =
+    pytest reagent/gym/tests -n2 -m "not serial" -k "not test_replay_buffer_gym_cpu"
+
+
+[testenv:circleci_gym_replay_buffer_1_gpu_unittest]
 install_command = {[ubuntu_gpu]install_command}
 commands =
-    pytest reagent/gym/tests -n2 -m "(not serial) and (not seq2slate_long)"
-    pytest reagent/gym/tests -n0 -m "serial"
+    pytest reagent/gym/tests -n0 -m "serial" -k "test_replay_buffer_gym_gpu_1"
 
 
-[testenv:circleci_seq2slate_unittest]
+[testenv:circleci_gym_replay_buffer_2_gpu_unittest]
 install_command = {[ubuntu_gpu]install_command}
 commands =
-    pytest reagent/test -n0 -m "seq2slate_long"
+    pytest reagent/gym/tests -n0 -m "serial" -k "test_replay_buffer_gym_gpu_2"
+
+
+# all gpu tests in reagent/gym/tests except test_replay_buffer_gym_gpu_x
+[testenv:circleci_gym_gpu_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/gym/tests -n0 -m "serial" -k "not test_replay_buffer_gym_gpu"
+
+
+[testenv:circleci_ranking_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/test/ranking -n2
+
+
+[testenv:circleci_training_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/test/training -n2
+
+
+[testenv:circleci_prediction_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/test/prediction -n2
+
+
+[testenv:circleci_world_model_unittest]
+install_command = {[ubuntu_gpu]install_command}
+commands =
+    pytest reagent/test/world_model -n2