我正在做一个数据流量项目,我的工作是在 Google Cloud Platform 中使用 tensorflow 和 kubeflow 构建管道。我一直在尝试更改转换组件中 examplegen 输出的维度,并将训练器中的转换输出用作模型的输入。我需要将数据用作模型中的张量,但转换输出被保存为类。这是我的“预处理”和“功能”代码,它们是转换组件的一部分。

from __future__ import division
from __future__ import print_function

import tensorflow as tf
import tensorflow_transform as tft

from models import features
import numpy as np
import pandas as pd

def _fill_in_missing(x):
"""Replace missing values in a SparseTensor.

Fills in missing values of `x` with '' or 0, and converts to a dense tensor.

    x: A `SparseTensor` of rank 2.  Its dense shape should have size at most 1
    in the second dimension.

    A rank 1 tensor where missing values of `x` have been filled in.
  if isinstance(x, tf.sparse.SparseTensor):
      default_value = '' if x.dtype == tf.string else 0
      dense_tensor = tf.sparse.to_dense(
          tf.SparseTensor(x.indices, x.values, [x.dense_shape[0], 1]), default_value)
      dense_tensor = x

  return tf.squeeze(dense_tensor, axis=1)

def preprocessing_fn(inputs):
    """tf.transform's callback function for preprocessing inputs.

        inputs: map from feature keys to raw not-yet-transformed features.

        Map from string feature key to transformed feature operations.
    outputs = {} 
    for key in features.DENSE_FLOAT_FEATURE_KEYS:
        # Preserve this feature as a dense float, setting nan's to the mean.
        outputs[features.transformed_name(key)] = tft.scale_to_z_score(_fill_in_missing(inputs[key]))

#   Do not apply label transformation as it will result in wrong evaluation.
        features.LABEL_KEY)] = inputs[features.LABEL_KEY]

    return outputs

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from typing import Text, List, Any

import os
import tempfile
import numpy as np

# At least one feature is needed.

# Name of features which have continuous float values. These features will be
# used as their own values.

# Keys

def transformed_name(key: Text) -> Text:
    """Generate the name of the transformed feature from original name."""
    return key + '_x'

def transformed_names(keys: List[Text]) -> List[Text]:
    """Transform multiple feature names at once."""
    return [transformed_name(key) for key in keys]

标签: pythontensorflowgoogle-cloud-platformkubeflowkubeflow-pipelines

