如何学习TensorFlow源码

如题所述

第1个回答  2016-12-03
如果从源码构建TensorFlow会需要执行如下命令:
bazel build -c opt //tensorflow/tools/pip_package:build_pip_package

对应的BUILD文件的rule为:
sh_binary(
name = "build_pip_package",
srcs = ["build_pip_package.sh"],
data = [
"MANIFEST.in",
"README",
"setup.py",
"//tensorflow/core:framework_headers",
":other_headers",
":simple_console",
"//tensorflow:tensorflow_py",
"//tensorflow/examples/tutorials/mnist:package",
"//tensorflow/models/embedding:package",
"//tensorflow/models/image/cifar10:all_files",
"//tensorflow/models/image/mnist:convolutional",
"//tensorflow/models/rnn:package",
"//tensorflow/models/rnn/ptb:package",
"//tensorflow/models/rnn/translate:package",
"//tensorflow/tensorboard",
],
)

sh_binary在这里的主要作用是生成data的这些依赖。一个一个来看,一开始的三个文件MANIFEST.in、README、setup.py是直接存在的,因此不会有什么操作。
“//tensorflow/core:framework_headers”:其对应的rule为:
filegroup(
name = "framework_headers",
srcs = [
"framework/allocator.h",
......
"util/device_name_utils.h",
],
)

这里filegroup的作用是给这一堆头文件一个别名,方便其他rule引用。
“:other_headers”:rule为:
transitive_hdrs(
name = "other_headers",
deps = [
"//third_party/eigen3",
"//tensorflow/core:protos_all_cc",
],
)

transitive_hdrs的定义在:
load("//tensorflow:tensorflow.bzl", "transitive_hdrs")

实现为:
# Bazel rule for collecting the header files that a target depends on.
def _transitive_hdrs_impl(ctx):
outputs = set()
for dep in ctx.attr.deps:
outputs += dep.cc.transitive_headers
return struct(files=outputs)

_transitive_hdrs = rule(attrs={
"deps": attr.label_list(allow_files=True,
providers=["cc"]),
},
implementation=_transitive_hdrs_impl,)

def transitive_hdrs(name, deps=[], **kwargs):
_transitive_hdrs(name=name + "_gather",
deps=deps)
native.filegroup(name=name,
srcs=[":" + name + "_gather"])

其作用依旧是收集依赖需要的头文件。
“:simple_console”:其rule为:
py_binary(
name = "simple_console",
srcs = ["simple_console.py"],
srcs_version = "PY2AND3",
deps = ["//tensorflow:tensorflow_py"],
)
py_library(
name = "tensorflow_py",
srcs = ["__init__.py"],
srcs_version = "PY2AND3",
visibility = ["//visibility:public"],
deps = ["//tensorflow/python"],
)

simple_console.py的代码的主要部分是:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import code
import sys

def main(_):
"""Run an interactive console."""
code.interact()
return 0

if __name__ == '__main__':
sys.exit(main(sys.argv))

可以看到起通过deps = [“//tensorflow/python”]构建了依赖包,然后生成了对应的执行文件。看下依赖的rule规则。//tensorflow/python对应的rule为:
py_library(
name = "python",
srcs = [
"__init__.py",
],
srcs_version = "PY2AND3",
visibility = ["//tensorflow:__pkg__"],
deps = [
":client",
":client_testlib",
":framework",
":framework_test_lib",
":kernel_tests/gradient_checker",
":platform",
":platform_test",
":summary",
":training",
"//tensorflow/contrib:contrib_py",
],
)

这里如果仔细看的话会发现其主要是生成一堆python的模块。从这里貌似可以看出每个python的module都对应了一个rule,且module依赖的module都写在了deps里。特别的,作为一个C++的切入,我们关注下training这个依赖:
py_library(
name = "training",
srcs = glob(
["training/**/*.py"],
exclude = ["**/*test*"],
),
srcs_version = "PY2AND3",
deps = [
":client",
":framework",
":lib",
":ops",
":protos_all_py",
":pywrap_tensorflow",
":training_ops",
],
)

这里其依赖的pywrap_tensorflow的rule为:
tf_py_wrap_cc(
name = "pywrap_tensorflow",
srcs = ["tensorflow.i"],
swig_includes = [
"client/device_lib.i",
"client/events_writer.i",
"client/server_lib.i",
"client/tf_session.i",
"framework/python_op_gen.i",
"lib/core/py_func.i",
"lib/core/status.i",
"lib/core/status_helper.i",
"lib/core/strings.i",
"lib/io/py_record_reader.i",
"lib/io/py_record_writer.i",
"platform/base.i",
"platform/numpy.i",
"util/port.i",
"util/py_checkpoint_reader.i",
],
deps = [
":py_func_lib",
":py_record_reader_lib",
":py_record_writer_lib",
":python_op_gen",
":tf_session_helper",
"//tensorflow/core/distributed_runtime:server_lib",
"//tensorflow/core/distributed_runtime/rpc:grpc_server_lib",
"//tensorflow/core/distributed_runtime/rpc:grpc_session",
"//util/python:python_headers",
],
)

tf_py_wrap_cc为其自己实现的一个rule,这里的.i就是SWIG的interface文件。来看下其实现:
def tf_py_wrap_cc(name, srcs, swig_includes=[], deps=[], copts=[], **kwargs):
module_name = name.split("/")[-1]
# Convert a rule name such as foo/bar/baz to foo/bar/_baz.so
# and use that as the name for the rule producing the .so file.
cc_library_name = "/".join(name.split("/")[:-1] + ["_" + module_name + ".so"])
extra_deps = []
_py_wrap_cc(name=name + "_py_wrap",
srcs=srcs,
swig_includes=swig_includes,
deps=deps + extra_deps,
module_name=module_name,
py_module_name=name)
native.cc_binary(
name=cc_library_name,
srcs=[module_name + ".cc"],
copts=(copts + ["-Wno-self-assign", "-Wno-write-strings"]
+ tf_extension_copts()),
linkopts=tf_extension_linkopts(),
linkstatic=1,
linkshared=1,
deps=deps + extra_deps)
native.py_library(name=name,
srcs=[":" + name + ".py"],
srcs_version="PY2AND3",
data=[":" + cc_library_name])

按照SWIG的正常流程,先要通过swig命令生成我们的wrap的c文件,然后和依赖生成我们的so文件,最后生成一个同名的python文件用于import。这里native.cc_binary和native.py_library做了我们后面的两件事情,而swig命令的执行则交给了_py_wrap_cc。其实现为:
_py_wrap_cc = rule(attrs={
"srcs": attr.label_list(mandatory=True,
allow_files=True,),
"swig_includes": attr.label_list(cfg=DATA_CFG,
allow_files=True,),
"deps": attr.label_list(allow_files=True,
providers=["cc"],),
"swig_deps": attr.label(default=Label(
"//tensorflow:swig")), # swig_templates
"module_name": attr.string(mandatory=True),
"py_module_name": attr.string(mandatory=True),
"swig_binary": attr.label(default=Label("//tensorflow:swig"),
cfg=HOST_CFG,
executable=True,
allow_files=True,),
},
outputs={
"cc_out": "%{module_name}.cc",
"py_out": "%{py_module_name}.py",
},
implementation=_py_wrap_cc_impl,)

_py_wrap_cc_impl的实现为:
# Bazel rules for building swig files.
def _py_wrap_cc_impl(ctx):
srcs = ctx.files.srcs
if len(srcs) != 1:
fail("Exactly one SWIG source file label must be specified.", "srcs")
module_name = ctx.attr.module_name
cc_out = ctx.outputs.cc_out
py_out = ctx.outputs.py_out
src = ctx.files.srcs[0]
args = ["-c++", "-python"]
args += ["-module", module_name]
args += ["-l" + f.path for f in ctx.files.swig_includes]
cc_include_dirs = set()
cc_includes = set()
for dep in ctx.attr.deps:
cc_include_dirs += [h.dirname for h in dep.cc.transitive_headers]
cc_includes += dep.cc.transitive_headers
args += ["-I" + x for x in cc_include_dirs]
args += ["-I" + ctx.label.workspace_root]
args += ["-o", cc_out.path]
args += ["-outdir", py_out.dirname]
args += [src.path]
outputs = [cc_out, py_out]
ctx.action(executable=ctx.executable.swig_binary,
arguments=args,
mnemonic="PythonSwig",
inputs=sorted(set([src]) + cc_includes + ctx.files.swig_includes +
ctx.attr.swig_deps.files),
outputs=outputs,
progress_message="SWIGing {input}".format(input=src.path))
return struct(files=set(outputs))

这里的ctx.executable.swig_binary是一个shell脚本,内容为:
# If possible, read swig path out of "swig_path" generated by configure
SWIG=swig
SWIG_PATH=tensorflow/tools/swig/swig_path
if [ -e $SWIG_PATH ]; then
SWIG=`cat $SWIG_PATH`
fi

# If this line fails, rerun configure to set the path to swig correctly
"$SWIG" "$@"

可以看到起就是调用了swig命令。
“//tensorflow:tensorflow_py”:其rule为:
py_library(
name = "tensorflow_py",
srcs = ["__init__.py"],
srcs_version = "PY2AND3",
visibility = ["//visibility:public"],
deps = ["//tensorflow/python"],
)
第2个回答  2016-12-03
s = ["build_pip_package.sh"],
data = [
"MANIFEST.in",
"README",
"setup.py",
"//tensorflow/core:framework_headers",
":other_headers",
":simple_cons
相似回答