Database Table Creation Modification

Summary:
Modified:
fbcode/fbjava/fb-spark-applications/rl/dqn-preprocessing/src/main/scala/com/facebook/spark/rl/MultiStepTimeline.scala

To have a similar table creation process as:
fbcode/fbjava/fb-spark-applications/rl/dqn-preprocessing/src/main/scala/com/facebook/spark/rl/Timeline.scala

Reviewed By: czxttkl

Differential Revision: D19662523

fbshipit-source-id: 9cfd789d4a0d737ca0d0a54019be332104699717
This commit is contained in:
Gary Engler
2020-01-31 12:51:05 -08:00
committed by Facebook Github Bot
parent 0d32fad4ca
commit 7a9d1fa4eb
@@ -128,16 +128,15 @@ object MultiStepTimeline {
Helper.getDataTypes(sqlContext, config.inputTableName, List("action"))("action")
log.info("action column data type:" + s"${actionDataType}")
assert(Set("string", "map<bigint,double>").contains(actionDataType))
val actionDiscrete = actionDataType == "string"
var sortActionMethod = "UDF_SORT_ID";
var sortPossibleActionMethod = "UDF_SORT_ARRAY_ID";
if (!actionDiscrete) {
if (actionDataType != "string") {
sortActionMethod = "UDF_SORT_MAP";
sortPossibleActionMethod = "UDF_SORT_ARRAY_MAP";
}
MultiStepTimeline.createTrainingTable(sqlContext, config.outputTableName, actionDiscrete)
MultiStepTimeline.createTrainingTable(sqlContext, config.outputTableName, actionDataType)
MultiStepTimeline.registerUDFs(sqlContext)
val sqlCommand = s"""
@@ -286,30 +285,23 @@ object MultiStepTimeline {
def createTrainingTable(
sqlContext: SQLContext,
tableName: String,
actionDiscrete: Boolean
actionDataType: String
): Unit = {
var actionType = "STRING";
var possibleActionType = "ARRAY<STRING>";
if (!actionDiscrete) {
actionType = "MAP<BIGINT, DOUBLE>"
possibleActionType = "ARRAY<MAP<BIGINT,DOUBLE>>"
}
val sqlCommand = s"""
CREATE TABLE IF NOT EXISTS ${tableName} (
mdp_id STRING,
state_features MAP <BIGINT, DOUBLE>,
action ${actionType},
action ${actionDataType},
action_probability DOUBLE,
reward ARRAY<DOUBLE>,
next_state_features ARRAY<MAP<BIGINT,DOUBLE>>,
next_action ARRAY<${actionType}>,
next_action ARRAY<${actionDataType}>,
sequence_number BIGINT,
sequence_number_ordinal BIGINT,
time_diff ARRAY<BIGINT>,
time_since_first BIGINT,
possible_actions ${possibleActionType},
possible_next_actions ARRAY<${possibleActionType}>,
possible_actions Array<${actionDataType}>,
possible_next_actions ARRAY<ARRAY<${actionDataType}>>,
metrics ARRAY<MAP<STRING, DOUBLE>>
) PARTITIONED BY (ds STRING) TBLPROPERTIES ('RETENTION'='30')
""".stripMargin