Cannot use custom Differentiable structure in a Layer input
laclouis5 opened this issue · 3 comments
I'm trying to implement a UNet-like segmentation network, here is the complete definition:
struct TensorPair<Scalar: TensorFlowFloatingPoint>: AdditiveArithmetic, Differentiable {
var lhs: Tensor<Scalar>
var rhs: Tensor<Scalar>
init(_ lhs: Tensor<Scalar>, _ rhs: Tensor<Scalar>) {
self.lhs = lhs
self.rhs = rhs
}
}
struct ReLU<Scalar: TensorFlowFloatingPoint>: ParameterlessLayer {
typealias TangentVector = EmptyTangentVector
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
relu(input)
}
}
struct DoubleConv<Scalar: TensorFlowFloatingPoint>: Layer {
var conv1, conv2: Conv2D<Scalar>
var bn1, bn2: BatchNorm<Scalar>
let relu = ReLU<Scalar>()
init(_ inChannels: Int, _ outChannels: Int) {
conv1 = Conv2D(filterShape: (3, 3, inChannels, outChannels), padding: .same, useBias: false)
bn1 = BatchNorm(featureCount: outChannels)
conv2 = Conv2D(filterShape: (3, 3, outChannels, outChannels), padding: .same, useBias: false)
bn2 = BatchNorm(featureCount: outChannels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu)
}
}
struct ResidualBlock<Scalar: TensorFlowFloatingPoint>: Layer {
var conv1, conv2: Conv2D<Scalar>
var bn1, bn2: BatchNorm<Scalar>
var relu = ReLU<Scalar>()
init(channels: Int) {
conv1 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
conv2 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
bn1 = BatchNorm(featureCount: channels)
bn2 = BatchNorm(featureCount: channels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu) + input
}
}
struct Down<Scalar: TensorFlowFloatingPoint>: Layer {
var downsample: Conv2D<Scalar>
var conv: ResidualBlock<Scalar>
init(_ inChannels: Int, _ outChannels: Int) {
downsample = Conv2D(filterShape: (3, 3, inChannels, outChannels), strides: (2, 2), padding: .same)
conv = ResidualBlock(channels: outChannels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
input.sequenced(through: downsample, conv)
}
}
struct Up<Scalar: TensorFlowFloatingPoint>: Layer {
var upsample: TransposedConv2D<Scalar>
var conv: ResidualBlock<Scalar>
init(_ inChannels: Int, _ outChannels: Int) {
upsample = TransposedConv2D(filterShape: (2, 2, outChannels, inChannels), strides: (2, 2))
conv = ResidualBlock(channels: outChannels)
}
@differentiable
func callAsFunction(_ input: TensorPair<Scalar>) -> Tensor<Scalar> {
conv(upsample(input.lhs) + input.rhs)
}
}
struct Head<Scalar: TensorFlowFloatingPoint>: Layer {
var conv: Conv2D<Scalar>
init(_ inChannels: Int, _ outChannels: Int) {
self.conv = Conv2D(filterShape: (1, 1, inChannels, outChannels), padding: .same)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
conv(input)
}
}
struct UNet<Scalar: TensorFlowFloatingPoint>: Layer {
var adapter: DoubleConv<Scalar>
var down1, down2, down3, down4: Down<Scalar>
var up1, up2, up3, up4: Up<Scalar>
var head: Head<Scalar>
init(_ inChannels: Int = 3, _ outChannels: Int = 1) {
adapter = DoubleConv(inChannels, 64)
down1 = Down(64, 128)
down2 = Down(128, 256)
down3 = Down(256, 512)
down4 = Down(512, 1024)
up1 = Up(1024, 512)
up2 = Up(512, 256)
up3 = Up(256, 128)
up4 = Up(128, 64)
head = Head(64, outChannels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
let d0 = adapter(input)
let d1 = down1(d0)
let d2 = down2(d1)
let d3 = down3(d2)
let d4 = down4(d3)
let u1 = up1(TensorPair(d4, d3))
let u2 = up1(TensorPair(u1, d2))
let u3 = up1(TensorPair(u2, d1))
let u4 = up1(TensorPair(u3, d0))
let output = head(u4)
return output
}
}
The problem is that the Up
layer must accept two inputs (the input and shortcut connection), so I created a Differentiable struct to handle this case as the callAsFunction()
method only accept one input. However, this leads to an error:
error: <Cell 11>:32:22: error: expression is not differentiable
let u4 = up1(TensorPair(u3, d0))
^
<Cell 11>:32:22: note: cannot differentiate functions that have not been marked '@differentiable' and that are defined in other files
let u4 = up1(TensorPair(u3, d0))
Same error for the three other Up
layers.
- Any idea on how to solve this?
- Is there a better alternative to write layers that accept more than one input?
- Will the
Layer
protocol support acallAsFunction()
requirement with multiple inputs in the future (when generics allow that of course), i.e acallAsFunction(_ input1: Input1, _ input2: Input2, ...)
orcallAsFunction(_ inputs: Input...)
? A also think that theTensorPair
struct should be part of the TensorFlow Swift API, or that tuples should automatically conform to Differentiable when possible.
Thanks for reporting!
- Any idea on how to solve this?
Could you please share the compiler version you're using (swift --version
)?
Your code snippet compiles fine for me, with Swift for TensorFlow 0.12:
$ swift --version
Swift version 5.3-dev (LLVM 69d8678431d3eee, Swift e1aef96b7fea59b)
Target: x86_64-apple-darwin19.6.0
import TensorFlow
struct TensorPair<Scalar: TensorFlowFloatingPoint>: AdditiveArithmetic, Differentiable {
var lhs: Tensor<Scalar>
var rhs: Tensor<Scalar>
init(_ lhs: Tensor<Scalar>, _ rhs: Tensor<Scalar>) {
self.lhs = lhs
self.rhs = rhs
}
}
struct ReLU<Scalar: TensorFlowFloatingPoint>: ParameterlessLayer {
typealias TangentVector = EmptyTangentVector
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
relu(input)
}
}
struct DoubleConv<Scalar: TensorFlowFloatingPoint>: Layer {
var conv1, conv2: Conv2D<Scalar>
var bn1, bn2: BatchNorm<Scalar>
var relu = ReLU<Scalar>()
init(_ inChannels: Int, _ outChannels: Int) {
conv1 = Conv2D(filterShape: (3, 3, inChannels, outChannels), padding: .same, useBias: false)
bn1 = BatchNorm(featureCount: outChannels)
conv2 = Conv2D(filterShape: (3, 3, outChannels, outChannels), padding: .same, useBias: false)
bn2 = BatchNorm(featureCount: outChannels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu)
}
}
struct ResidualBlock<Scalar: TensorFlowFloatingPoint>: Layer {
var conv1, conv2: Conv2D<Scalar>
var bn1, bn2: BatchNorm<Scalar>
var relu = ReLU<Scalar>()
init(channels: Int) {
conv1 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
conv2 = Conv2D(filterShape: (3, 3, channels, channels), padding: .same, useBias: false)
bn1 = BatchNorm(featureCount: channels)
bn2 = BatchNorm(featureCount: channels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
input.sequenced(through: conv1, bn1, relu, conv2, bn2, relu) + input
}
}
struct Down<Scalar: TensorFlowFloatingPoint>: Layer {
var downsample: Conv2D<Scalar>
var conv: ResidualBlock<Scalar>
init(_ inChannels: Int, _ outChannels: Int) {
downsample = Conv2D(filterShape: (3, 3, inChannels, outChannels), strides: (2, 2), padding: .same)
conv = ResidualBlock(channels: outChannels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
input.sequenced(through: downsample, conv)
}
}
struct Up<Scalar: TensorFlowFloatingPoint>: Layer {
var upsample: TransposedConv2D<Scalar>
var conv: ResidualBlock<Scalar>
init(_ inChannels: Int, _ outChannels: Int) {
upsample = TransposedConv2D(filterShape: (2, 2, outChannels, inChannels), strides: (2, 2))
conv = ResidualBlock(channels: outChannels)
}
@differentiable
func callAsFunction(_ input: TensorPair<Scalar>) -> Tensor<Scalar> {
conv(upsample(input.lhs) + input.rhs)
}
}
struct Head<Scalar: TensorFlowFloatingPoint>: Layer {
var conv: Conv2D<Scalar>
init(_ inChannels: Int, _ outChannels: Int) {
self.conv = Conv2D(filterShape: (1, 1, inChannels, outChannels), padding: .same)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
conv(input)
}
}
struct UNet<Scalar: TensorFlowFloatingPoint>: Layer {
var adapter: DoubleConv<Scalar>
var down1, down2, down3, down4: Down<Scalar>
var up1, up2, up3, up4: Up<Scalar>
var head: Head<Scalar>
init(_ inChannels: Int = 3, _ outChannels: Int = 1) {
adapter = DoubleConv(inChannels, 64)
down1 = Down(64, 128)
down2 = Down(128, 256)
down3 = Down(256, 512)
down4 = Down(512, 1024)
up1 = Up(1024, 512)
up2 = Up(512, 256)
up3 = Up(256, 128)
up4 = Up(128, 64)
head = Head(64, outChannels)
}
@differentiable
func callAsFunction(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
let d0 = adapter(input)
let d1 = down1(d0)
let d2 = down2(d1)
let d3 = down3(d2)
let d4 = down4(d3)
let u1 = up1(TensorPair(d4, d3))
let u2 = up1(TensorPair(u1, d2))
let u3 = up1(TensorPair(u2, d1))
let u4 = up1(TensorPair(u3, d0))
let output = head(u4)
return output
}
}
- Is there a better alternative to write layers that accept more than one input?
- Will the
Layer
protocol support acallAsFunction()
requirement with multiple inputs in the future (when generics allow that of course), i.e acallAsFunction(_ input1: Input1, _ input2: Input2, ...)
orcallAsFunction(_ inputs: Input...)
? A also think that theTensorPair
struct should be part of the TensorFlow Swift API, or that tuples should automatically conform to Differentiable when possible.
There isn't a better way to write layers taking/return more than one value currently, unfortunately.
Layer.callAsFunction
takes a single Differentiable
-conforming argument and returns a single Differentiable
-conforming result. To encode multiple arguments or results, a tuple-representing struct like TensorPair
is the best solution.
Tuple types cannot yet conform to protocols like Differentiable
. Some hardcoded support is ~being added to let tuples conform to Equatable
, Comparable
, and Hashable
(apple/swift#28833), but that is quite involved and not extensible. There's also the nuance that different tuple types with the same arity may have different TangentVector
types based on elements' conformances to Differentiable
(like (Float, Float)
vs (Float, Int)
).
By the way, here's an alternative (simpler) UNet
definition. It directly calls functions like relu
in callAsFunction
methods instead of creating a dedicated ReLU
layer.
Thank you for the quick and detailed answer! I'm using S4TF in a Google Colab but I was not enable to find the swift version.
I see that the simpler implementation you linked inlines everything in the UNet Module, this is great but not flexible enough for my application.
I'll try on a different environment as the code seems to compile correctly on your side.
Please open a new issue if you have any more questions :)