最近发现模型在更新的一瞬间容易产生超时的问题,于是就了解了一下 tf-serving 中有个warmup主要是通过模型启动时加载${model}/${version}/assets.extra/tf_serving_warmup_requests达到热启动的目的,使得模型更新时不易产生超时的问题
首先根据自己的模型字段进行编写形成tf_serving_warmup_requests文件,server就是tf-serving的机器加端口号,,在导出模型时和warmup文件一起导出
以下是我warmup文件生成代码
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author: supeihuang
# Time: 2019/9/5 9:33import numpy
import tensorflow as tf
from tensorflow_serving.apis import model_pb2
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_log_pb2
from tensorflow_serving.apis import prediction_service_pb2
from grpc.beta import implementations
import grpc
import tensorflow as tf
from tensorflow_serving.apis import prediction_service_pb2_grpc
import datetimetf.app.flags.DEFINE_string('server', '47.93.217.40:31220','PredictionService host:port')tf.app.flags.DEFINE_string('model', 'din_pregrant_posts_3','Model name.')
FLAGS = tf.app.flags.FLAGS_CSV_COLUMNS =["labels", "userId", "itemId","userEntrance", "userRequestTime", "userRequestWeek", "userOs", "userApn","userUa",#9"userMode", "userProvince", "userCity", "userCityLevel", "userMarr", "userAge","userGestat_week","userAgeRange", "userBage", "userAppV",#10"userCliN_Inc", "userShoN_Inc", "userBotActCt","userTotalTime", "userView2BottomTimes", "userEffTimes", "userFirstRequest", "userAppLTag", "userHisL",#9"itemAlgSource", "itemTexL", "itemKwN", "itemTitL", "itemTwN", "itemImgN", "itemSour", "itemCreT", "itemCliN_Inc","itemShoN_Inc",#10"itemRevi", "itemColN","itemShare", "itemVreN", "itemLireN", "itemLike", "itemEffUsers", "itemView2BottomTimes", "itemTotalTime","itemBotSum","itemMt",#11"itemContentH", "itemCtr", "itemAvsT", "itemFiR", "itemTimeScore", "itemBotSumCliR", "itemSexW","itemSuperstiW","itemLowTitleW",#9"itemTtP","itemKtW", "itemKtW2", "itemTag1", "itemTag2", "itemTag3", "itemKs1", "itemKs2",#7"userItemHistory", "userKeywordHistory", "userKeyword2History","userTag1History", "userTag2History", "userTag3History", "userKs1History", "userKs2History","userKtW","userTtP"]#10#10个 userItemHistory可有可无
_COLUMNS_NEED_SPILIT = ["userItemHistory", "userKeywordHistory", "userKeyword2History", "userTag1History","userTag2History", "userTag3History", "userKs1History", "userKs2History"]def main():# create the RPC stub# channel = implementations.insecure_channel("127.0.0.1", int(8502))# stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)## # create the request object and set the name and signature_name params# request = predict_pb2.PredictRequest()# request.model_spec.name = &#39;din&#39;# request.model_spec.signature_name = &#39;predict&#39;channel = grpc.insecure_channel(FLAGS.server)stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)request = predict_pb2.PredictRequest()print(FLAGS.model)request.model_spec.name = FLAGS.modelrequest.model_spec.signature_name = &#39;serving_default&#39;requests = []file = open("209", &#39;r&#39;, encoding=&#39;UTF-8&#39;)i = 0try:while True and (i <200):text_line = file.readline()if text_line:a = eval(text_line)b = {}for j in range(76):b[_CSV_COLUMNS[j]] = [a[j]]i = i + 1print("b:",b)c = [&#39;userKeywordHistory&#39;,&#39;userKeyword2History&#39;,&#39;userTag1History&#39;,
&#39;userTag2History&#39;,&#39;userTag3History&#39;,&#39;userKs1History&#39;,&#39;userKs2History&#39;,"userTtP","userKtW"]for item in c:print("item:",item)tmp = b[item]print("tmp:",tmp)if tmp[0] is not &#39;&#39;:b[item] = [list(eval(tmp[0]))]else:b[item]=tmp[0]# print(b[&#39;userTag1History&#39;])requests.append(b)else:breakfinally:file.close()# read file and get valuenum = 1with tf.io.TFRecordWriter("tf_serving_warmup_requests") as writer:for i in range(0, 1):request = predict_pb2.PredictRequest(model_spec=model_pb2.ModelSpec(name="din_pregrant_posts_3", signature_name="serving_default"),inputs={# "time_weight": tf.make_tensor_proto([10] * num, shape=[num],# dtype=tf.int32),"userEntrance": tf.make_tensor_proto(requests[i][&#39;userEntrance&#39;] * num,shape=[num] , dtype=tf.int32),"userRequestTime": tf.make_tensor_proto(requests[i][&#39;userRequestTime&#39;] * num,shape=[num] ,dtype=tf.int32),"userRequestWeek": tf.make_tensor_proto(requests[i][&#39;userRequestWeek&#39;]* num,shape=[num] , dtype=tf.int32),"userOs": tf.make_tensor_proto(requests[i][&#39;userOs&#39;]* num,shape=[num] , dtype=tf.int32),"userApn": tf.make_tensor_proto(requests[i][&#39;userApn&#39;]* num,shape=[num] , dtype=tf.int32),"userUa": tf.make_tensor_proto(requests[i][&#39;userUa&#39;]* num,shape=[num] , dtype=tf.int32),"userMode": tf.make_tensor_proto(requests[i][&#39;userMode&#39;]* num,shape=[num] , dtype=tf.int32),"userProvince": tf.make_tensor_proto(requests[i][&#39;userProvince&#39;]* num,shape=[num] , dtype=tf.int32),"userCity": tf.make_tensor_proto(requests[i][&#39;userCity&#39;]* num,shape=[num] , dtype=tf.int32),"userCityLevel": tf.make_tensor_proto(requests[i][&#39;userCityLevel&#39;]* num,shape=[num] , dtype=tf.int32),"userMarr": tf.make_tensor_proto(requests[i][&#39;userMarr&#39;]* num,shape=[num] , dtype=tf.int32),"userAge": tf.make_tensor_proto(requests[i][&#39;userAge&#39;]* num,shape=[num] , dtype=tf.int32),"userGestat_week": tf.make_tensor_proto(requests[i][&#39;userGestat_week&#39;]* num,shape=[num], dtype=tf.int32),"userAgeRange": tf.make_tensor_proto(requests[i][&#39;userAgeRange&#39;]* num,shape=[num] , dtype=tf.int32),"userBage": tf.make_tensor_proto(requests[i][&#39;userBage&#39;]* num,shape=[num] , dtype=tf.int32),"userAppV": tf.make_tensor_proto(requests[i][&#39;userAppV&#39;]* num,shape=[num] , dtype=tf.int32),"userCliN_Inc": tf.make_tensor_proto(requests[i][&#39;userCliN_Inc&#39;]* num,shape=[num] , dtype=tf.int32),"userShoN_Inc": tf.make_tensor_proto(requests[i][&#39;userShoN_Inc&#39;]* num,shape=[num] , dtype=tf.int32),"userBotActCt": tf.make_tensor_proto(requests[i][&#39;userBotActCt&#39;]* num,shape=[num] , dtype=tf.int32),"userTotalTime": tf.make_tensor_proto(requests[i][&#39;userTotalTime&#39;]* num,shape=[num] , dtype=tf.int32),"userView2BottomTimes": tf.make_tensor_proto(requests[i][&#39;userView2BottomTimes&#39;]* num,shape=[num] , dtype=tf.int32),"userEffTimes": tf.make_tensor_proto(requests[i][&#39;userEffTimes&#39;]* num,shape=[num] , dtype=tf.int32),"userFirstRequest": tf.make_tensor_proto(requests[i][&#39;userFirstRequest&#39;]* num,shape=[num] , dtype=tf.int32),"userAppLTag": tf.make_tensor_proto(requests[i][&#39;userAppLTag&#39;]* num,shape=[num] , dtype=tf.int32),"itemAlgSource": tf.make_tensor_proto(requests[i][&#39;itemAlgSource&#39;]* num,shape=[num] , dtype=tf.int32),"itemTexL": tf.make_tensor_proto(requests[i][&#39;itemTexL&#39;]* num,shape=[num] , dtype=tf.int32),"itemKwN": tf.make_tensor_proto(requests[i][&#39;itemKwN&#39;]* num,shape=[num] , dtype=tf.int32),"itemTitL": tf.make_tensor_proto(requests[i][&#39;itemTitL&#39;]* num,shape=[num] , dtype=tf.int32),"itemTwN": tf.make_tensor_proto(requests[i][&#39;itemTwN&#39;]* num,shape=[num] , dtype=tf.int32),"itemImgN": tf.make_tensor_proto(requests[i][&#39;itemImgN&#39;]* num,shape=[num] , dtype=tf.int32),"itemSour": tf.make_tensor_proto(requests[i][&#39;itemSour&#39;]* num,shape=[num] , dtype=tf.int32),"itemCreT": tf.make_tensor_proto(requests[i][&#39;itemCreT&#39;]* num,shape=[num] , dtype=tf.int32),"itemCliN_Inc": tf.make_tensor_proto(requests[i][&#39;itemCliN_Inc&#39;]* num,shape=[num] , dtype=tf.int32),"itemShoN_Inc": tf.make_tensor_proto(requests[i][&#39;itemShoN_Inc&#39;]* num,shape=[num] , dtype=tf.int32),"itemRevi": tf.make_tensor_proto(requests[i][&#39;itemRevi&#39;]* num,shape=[num] , dtype=tf.int32),"itemColN": tf.make_tensor_proto(requests[i][&#39;itemColN&#39;]* num,shape=[num] , dtype=tf.int32),"itemShare": tf.make_tensor_proto(requests[i][&#39;itemShare&#39;]* num,shape=[num] , dtype=tf.int32),"itemVreN": tf.make_tensor_proto(requests[i][&#39;itemVreN&#39;]* num,shape=[num] , dtype=tf.int32),"itemLireN": tf.make_tensor_proto(requests[i][&#39;itemLireN&#39;]* num,shape=[num] , dtype=tf.int32),"itemLike": tf.make_tensor_proto(requests[i][&#39;itemLike&#39;]* num,shape=[num] , dtype=tf.int32),"itemEffUsers": tf.make_tensor_proto(requests[i][&#39;itemEffUsers&#39;]* num,shape=[num] , dtype=tf.int32),"itemView2BottomTimes": tf.make_tensor_proto(requests[i][&#39;itemView2BottomTimes&#39;]* num,shape=[num] , dtype=tf.int32),"itemTotalTime": tf.make_tensor_proto(requests[i][&#39;itemTotalTime&#39;]* num,shape=[num] , dtype=tf.int32),"itemBotSum": tf.make_tensor_proto(requests[i][&#39;itemBotSum&#39;]* num,shape=[num] , dtype=tf.int32),"itemMt": tf.make_tensor_proto(requests[i][&#39;itemMt&#39;] * num, shape=[num], dtype=tf.int32),"itemContentH": tf.make_tensor_proto(requests[i][&#39;itemContentH&#39;]* num,shape=[num] , dtype=tf.double),"itemCtr": tf.make_tensor_proto(requests[i][&#39;itemCtr&#39;]* num,shape=[num] , dtype=tf.double),"itemAvsT": tf.make_tensor_proto(requests[i][&#39;itemAvsT&#39;]* num,shape=[num] , dtype=tf.double),"itemFiR": tf.make_tensor_proto(requests[i][&#39;itemFiR&#39;]* num,shape=[num] , dtype=tf.double),"itemTimeScore": tf.make_tensor_proto(requests[i][&#39;itemTimeScore&#39;]* num,shape=[num] , dtype=tf.double),"itemBotSumCliR": tf.make_tensor_proto(requests[i][&#39;itemBotSumCliR&#39;]* num,shape=[num] , dtype=tf.double),"itemSexW": tf.make_tensor_proto(requests[i][&#39;itemSexW&#39;]* num,shape=[num] , dtype=tf.double),"itemSuperstiW": tf.make_tensor_proto(requests[i][&#39;itemSuperstiW&#39;]* num,shape=[num] , dtype=tf.double),"itemLowTitleW": tf.make_tensor_proto(requests[i][&#39;itemLowTitleW&#39;]* num,shape=[num] , dtype=tf.double),"itemKtW": tf.make_tensor_proto(requests[i][&#39;itemKtW&#39;]* num,shape=[num] , dtype=tf.int32),"itemKtW2": tf.make_tensor_proto(requests[i][&#39;itemKtW2&#39;]* num,shape=[num] , dtype=tf.int32),"itemTag1": tf.make_tensor_proto(requests[i][&#39;itemTag1&#39;]* num,shape=[num] , dtype=tf.int32),"itemTag2": tf.make_tensor_proto(requests[i][&#39;itemTag2&#39;]* num,shape=[num] , dtype=tf.int32),"itemTag3": tf.make_tensor_proto(requests[i][&#39;itemTag3&#39;]* num,shape=[num] , dtype=tf.int32),"itemKs1": tf.make_tensor_proto(requests[i][&#39;itemKs1&#39;]* num,shape=[num] , dtype=tf.int32),"itemKs2": tf.make_tensor_proto(requests[i][&#39;itemKs2&#39;] * num, shape=[num], dtype=tf.int32),"userKeywordHistory": tf.make_tensor_proto(requests[i][&#39;userKeywordHistory&#39;] * num ,shape=[num,requests[i][&#39;userHisL&#39;][0]], dtype=tf.int32),"userKeyword2History": tf.make_tensor_proto(requests[i][&#39;userKeyword2History&#39;]* num,shape=[num,requests[i][&#39;userHisL&#39;][0]], dtype=tf.int32),"userTag1History": tf.make_tensor_proto(requests[i][&#39;userTag1History&#39;]* num,shape=[num,requests[i][&#39;userHisL&#39;][0]], dtype=tf.int32),"userTag2History": tf.make_tensor_proto(requests[i][&#39;userTag2History&#39;]* num,shape=[num,requests[i][&#39;userHisL&#39;][0]], dtype=tf.int32),"userTag3History": tf.make_tensor_proto(requests[i][&#39;userTag3History&#39;]* num,shape=[num,requests[i][&#39;userHisL&#39;][0]], dtype=tf.int32),"userKs1History": tf.make_tensor_proto(requests[i][&#39;userKs1History&#39;]* num,shape=[num,requests[i][&#39;userHisL&#39;][0]], dtype=tf.int32),"userKs2History": tf.make_tensor_proto(requests[i][&#39;userKs2History&#39;]* num, shape=[num,requests[i][&#39;userHisL&#39;][0]],dtype=tf.int32),# "userTtP": tf.make_tensor_proto(requests[i][&#39;userTtP&#39;]* num,shape=[num,requests[i][&#39;userHisL&#39;][0]], dtype=tf.int32),# "userKtW": tf.make_tensor_proto(requests[i][&#39;userKtW&#39;]* num, shape=[num,requests[i][&#39;userHisL&#39;][0]],dtype=tf.int32),"userHisL": tf.make_tensor_proto(requests[i][&#39;userHisL&#39;] * num, shape=[num], dtype=tf.int32),})log = prediction_log_pb2.PredictionLog(predict_log=prediction_log_pb2.PredictLog(request=request))writer.write(log.SerializeToString())print(request)print(requests[i][&#39;userKeywordHistory&#39;])time_start = datetime.datetime.utcnow()for i in range(1000):respOnse= stub.Predict.future(request, 30.)time_end = datetime.datetime.utcnow()time_elapsed_sec = (time_end - time_start).total_seconds()print(&#39;Total elapsed time: {} seconds&#39;.format(time_elapsed_sec))print(&#39;Time for batch size {} repeated {} times&#39;.format(1, 1000))print(&#39;Average latency per batch: {} seconds&#39;.format(time_elapsed_sec / 1000))print(response)prediction = response.result()print(prediction)if __name__ == "__main__":main()
导出模型是的代码如下
def export_model(model, export_dir, checkpoint_path):"""Export to SavedModel format.Args:model_din: Estimator objectexport_dir: directory to export the model_din.model_column_fn: Function to generate model_din feature columns."""#54个特征feature_name = [ &#39;userEntrance&#39;, &#39;userRequestTime&#39;, &#39;userRequestWeek&#39;, &#39;userOs&#39;, &#39;userApn&#39;, &#39;userUa&#39;,&#39;userMode&#39;, &#39;userProvince&#39;, &#39;userCity&#39;, &#39;userCityLevel&#39;,&#39;userMarr&#39;, &#39;userAge&#39;,&#39;userGestat_week&#39;, &#39;userAgeRange&#39;, &#39;userBage&#39;, &#39;userAppV&#39;, &#39;userCliN_Inc&#39;, &#39;userShoN_Inc&#39;, &#39;userBotActCt&#39;, &#39;userTotalTime&#39;,&#39;userView2BottomTimes&#39;, &#39;userEffTimes&#39;, &#39;userFirstRequest&#39;, &#39;userAppLTag&#39;, &#39;itemAlgSource&#39;, &#39;itemTexL&#39;, &#39;itemKwN&#39;, &#39;itemTitL&#39;, &#39;itemTwN&#39;,&#39;itemImgN&#39;,&#39;itemSour&#39;, &#39;itemCreT&#39;, &#39;itemCliN_Inc&#39;, &#39;itemShoN_Inc&#39;, &#39;itemRevi&#39;, &#39;itemColN&#39;, &#39;itemShare&#39;, &#39;itemVreN&#39;, &#39;itemLireN&#39;, &#39;itemLike&#39;,&#39;itemEffUsers&#39;, &#39;itemView2BottomTimes&#39;,&#39;itemTotalTime&#39;, &#39;itemBotSum&#39;,&#39;itemMt&#39;,&#39;itemContentH&#39;, &#39;itemCtr&#39;, &#39;itemAvsT&#39;, &#39;itemFiR&#39;, &#39;itemTimeScore&#39;,&#39;itemBotSumCliR&#39;,&#39;itemSexW&#39;,&#39;itemSuperstiW&#39;, &#39;itemLowTitleW&#39;]serving_features = {}for item in feature_name:double_feature = ["itemContentH", "itemCtr", "itemAvsT", "itemFiR", "itemTimeScore","itemBotSumCliR", "itemSexW", "itemSuperstiW", "itemLowTitleW"]if item in double_feature:serving_features[item] = tf.placeholder(tf.double, [None, ], name=item)else:serving_features[item] = tf.placeholder(tf.int32, [None, ], name=item)#18个serving_features1 = {# "itemTtp": tf.placeholder(tf.int32, [None, ], name=&#39;itemTtp&#39;),"itemKtW": tf.placeholder(tf.int32, [None, ], name=&#39;keyword&#39;),"itemKtW2": tf.placeholder(tf.int32, [None, ], name=&#39;keyword2&#39;),"itemTag1": tf.placeholder(tf.int32, [None, ], name=&#39;tag1&#39;),"itemTag2": tf.placeholder(tf.int32, [None, ], name=&#39;tag2&#39;),"itemTag3": tf.placeholder(tf.int32, [None, ], name=&#39;tag3&#39;),"itemKs1": tf.placeholder(tf.int32, [None, ], name=&#39;ks1&#39;),"itemKs2": tf.placeholder(tf.int32, [None, ], name=&#39;ks2&#39;),"userKeywordHistory": tf.placeholder(tf.int32, [None, None], name=&#39;hist_keyword&#39;),"userKeyword2History": tf.placeholder(tf.int32, [None, None], name=&#39;hist_keyword2&#39;),"userTag1History": tf.placeholder(tf.int32, [None, None], name=&#39;hist_tag1&#39;),"userTag2History": tf.placeholder(tf.int32, [None, None], name=&#39;hist_tag2&#39;),"userTag3History": tf.placeholder(tf.int32, [None, None], name=&#39;hist_tag3&#39;),"userKs1History": tf.placeholder(tf.int32, [None, None], name=&#39;hist_ks1&#39;),"userKs2History": tf.placeholder(tf.int32, [None, None], name=&#39;hist_ks2&#39;),# "userTtP": tf.placeholder(tf.int32, [None, None], name=&#39;userTtP&#39;),# "userKtW": tf.placeholder(tf.int32, [None, None], name=&#39;userKtW&#39;),"userHisL": tf.placeholder(tf.int32, [None, ], name=&#39;sl&#39;)}serving_features.update(serving_features1)#构建一个serving_input_receiver_fn期望特征张量example_input_fn = (tf.estimator.export.build_raw_serving_input_receiver_fn(serving_features))# 导出为Tensorflow SavedModelreturn model.export_savedmodel(export_dir, example_input_fn, checkpoint_path=checkpoint_path,assets_extra={&#39;tf_serving_warmup_requests&#39;: &#39;/data/supeihuang/din_209_data/data_item/tf_serving_warmup_requests&#39;})