default: # the default specifies per dtype the pre-processing that should happen for all features, labels and evaluators boolean: filling: method: max parameters: {} label_tuning: method: no_tuning parameters: {} resampling: method: threshold parameters: {c_value: 0, cond: greater, set_value: 1, threshold: 0} transform: method: no_transform parameters: {} float: filling: method: max parameters: {} label_tuning: method: no_tuning parameters: {} resampling: method: mean parameters: {} transform: method: scale_to_z_score parameters: {} integer: filling: method: min parameters: {} label_tuning: method: no_tuning parameters: {} resampling: method: mean parameters: {} transform: method: scale_to_z_score parameters: {} string: filling: method: custom parameters: {custom_value: ''} label_tuning: method: no_tuning parameters: {} resampling: method: mode parameters: {} transform: method: compute_and_apply_vocabulary parameters: {} # evaluator: birth_year: {} # I'd like to see how I did across each year gender: {} # I'd like to see if the model biases because of gender start_station_name: {} # I'd like to see how I did across each station features: end_station_id: {} # id of station where trip ended: INT gender: {} # gender of person driving the car start_station_id: {} # if of station where trip started: INT tripduration: {} # trip time in seconds usertype: {} # usertype, one of [Customer, Subscriber] labels: birth_year: # year when person was born loss: mse # mean squared error metrics: [mae] # mean absolute error transform: method: no_transform # we want to override the `defaults` here, because we dont want the label normalized parameters: {} split: categorize_by: start_station_name # so that all starting stations are equally represented index_ratio: {eval: 0.1, train: 0.9} # 0.9 for training and 0.1 for test where: - birth_year > 1900 # the where clause allows a bit more flexibility, so we filter for all people above 1900, and remove nulls in one swoop trainer: layers: - {type: dense, units: 64} # a dense layer 64 units - {type: dense, units: 32} # a dense layer with 32 units architecture: feedforward # can be feedforward or sequential last_activation: linear # last layer: we can take relu, but linear should also be fine num_output_units: 1 # How many units in the last layer? We choose 1 because we want to regress one number (i.e. date_of_birth) optimizer: adam # optimizer for loss function save_checkpoints_steps: 15000 # how many steps before we do a checkpoint evaluation for our Tensorboard logs eval_batch_size: 256 # batch size for evalulation that happens at every checkpoint train_batch_size: 256 # batch size for training train_steps: 230000 # two epochs type: regression # choose from [regression, classification, autoencoder]