Merge branch 'master' into dg/params_docs
This commit is contained in:
commit
cbb9a2a929
|
@ -0,0 +1,24 @@
|
|||
name: CompatHelper
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '00 00 * * *'
|
||||
|
||||
jobs:
|
||||
CompatHelper:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
julia-version: [1.3]
|
||||
julia-arch: [x64]
|
||||
os: [ubuntu-latest]
|
||||
steps:
|
||||
- uses: julia-actions/setup-julia@latest
|
||||
with:
|
||||
version: ${{ matrix.julia-version }}
|
||||
- name: Pkg.add("CompatHelper")
|
||||
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
|
||||
- name: CompatHelper.main()
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: julia -e 'using CompatHelper; CompatHelper.main()'
|
|
@ -0,0 +1,11 @@
|
|||
name: TagBot
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 * * * *
|
||||
jobs:
|
||||
TagBot:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: JuliaRegistries/TagBot@v1
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
120
Manifest.toml
120
Manifest.toml
|
@ -8,15 +8,15 @@ version = "0.5.0"
|
|||
|
||||
[[AbstractTrees]]
|
||||
deps = ["Markdown"]
|
||||
git-tree-sha1 = "8201f932428d25a2e2903300764515754847d87d"
|
||||
git-tree-sha1 = "86d092c2599f1f7bb01668bf8eb3412f98d61e47"
|
||||
uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
|
||||
version = "0.3.0"
|
||||
version = "0.3.2"
|
||||
|
||||
[[Adapt]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "82dab828020b872fa9efd3abec1152b075bc7cbf"
|
||||
git-tree-sha1 = "c88cfc7f9c1f9f8633cddf0b56e86302b70f64c5"
|
||||
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
|
||||
version = "1.0.0"
|
||||
version = "1.0.1"
|
||||
|
||||
[[Base64]]
|
||||
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
|
||||
|
@ -34,21 +34,21 @@ version = "0.2.0"
|
|||
|
||||
[[CUDAapi]]
|
||||
deps = ["Libdl", "Logging"]
|
||||
git-tree-sha1 = "56a813440ac98a1aa64672ab460a1512552211a7"
|
||||
git-tree-sha1 = "d7ceadd8f821177d05b897c0517e94633db535fe"
|
||||
uuid = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3"
|
||||
version = "2.1.0"
|
||||
version = "3.1.0"
|
||||
|
||||
[[CUDAdrv]]
|
||||
deps = ["CEnum", "CUDAapi", "Printf"]
|
||||
git-tree-sha1 = "1fce616fa0806c67c133eb1d2f68f0f1a7504665"
|
||||
git-tree-sha1 = "01e90fa34e25776bc7c8661183d4519149ebfe59"
|
||||
uuid = "c5f51814-7f29-56b8-a69c-e4d8f6be1fde"
|
||||
version = "5.0.1"
|
||||
version = "6.0.0"
|
||||
|
||||
[[CUDAnative]]
|
||||
deps = ["Adapt", "CEnum", "CUDAapi", "CUDAdrv", "DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Printf", "TimerOutputs"]
|
||||
git-tree-sha1 = "6e11d5c2c91fc623952e94c4fb73f9c4db74795a"
|
||||
git-tree-sha1 = "f86269ff60ebe082a2806ecbce51f3cadc68afe9"
|
||||
uuid = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17"
|
||||
version = "2.7.0"
|
||||
version = "2.10.2"
|
||||
|
||||
[[CodecZlib]]
|
||||
deps = ["BinaryProvider", "Libdl", "TranscodingStreams"]
|
||||
|
@ -58,15 +58,15 @@ version = "0.6.0"
|
|||
|
||||
[[ColorTypes]]
|
||||
deps = ["FixedPointNumbers", "Random"]
|
||||
git-tree-sha1 = "7b62b728a5f3dd6ee3b23910303ccf27e82fad5e"
|
||||
git-tree-sha1 = "b9de8dc6106e09c79f3f776c27c62360d30e5eb8"
|
||||
uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
|
||||
version = "0.8.1"
|
||||
version = "0.9.1"
|
||||
|
||||
[[Colors]]
|
||||
deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Printf", "Reexport"]
|
||||
git-tree-sha1 = "c9c1845d6bf22e34738bee65c357a69f416ed5d1"
|
||||
git-tree-sha1 = "177d8b959d3c103a6d57574c38ee79c81059c31b"
|
||||
uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
|
||||
version = "0.9.6"
|
||||
version = "0.11.2"
|
||||
|
||||
[[CommonSubexpressions]]
|
||||
deps = ["Test"]
|
||||
|
@ -74,11 +74,17 @@ git-tree-sha1 = "efdaf19ab11c7889334ca247ff4c9f7c322817b0"
|
|||
uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
|
||||
version = "0.2.0"
|
||||
|
||||
[[CompilerSupportLibraries_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "b57c5d019367c90f234a7bc7e24ff0a84971da5d"
|
||||
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
|
||||
version = "0.2.0+1"
|
||||
|
||||
[[CuArrays]]
|
||||
deps = ["AbstractFFTs", "Adapt", "CEnum", "CUDAapi", "CUDAdrv", "CUDAnative", "DataStructures", "GPUArrays", "Libdl", "LinearAlgebra", "MacroTools", "NNlib", "Printf", "Random", "Requires", "SparseArrays", "TimerOutputs"]
|
||||
git-tree-sha1 = "51fbe053dea29ed2513e02d38380007310cf4c4b"
|
||||
git-tree-sha1 = "7c20c5a45bb245cf248f454d26966ea70255b271"
|
||||
uuid = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
|
||||
version = "1.6.0"
|
||||
version = "1.7.2"
|
||||
|
||||
[[DataAPI]]
|
||||
git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252"
|
||||
|
@ -87,9 +93,9 @@ version = "1.1.0"
|
|||
|
||||
[[DataStructures]]
|
||||
deps = ["InteractiveUtils", "OrderedCollections"]
|
||||
git-tree-sha1 = "f784254f428fb8fd7ac15982e5862a38a44523d3"
|
||||
git-tree-sha1 = "5a431d46abf2ef2a4d5d00bd0ae61f651cf854c8"
|
||||
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
|
||||
version = "0.17.7"
|
||||
version = "0.17.10"
|
||||
|
||||
[[Dates]]
|
||||
deps = ["Printf"]
|
||||
|
@ -107,9 +113,9 @@ version = "1.0.2"
|
|||
|
||||
[[DiffRules]]
|
||||
deps = ["NaNMath", "Random", "SpecialFunctions"]
|
||||
git-tree-sha1 = "10dca52cf6d4a62d82528262921daf63b99704a2"
|
||||
git-tree-sha1 = "eb0c34204c8410888844ada5359ac8b96292cfd1"
|
||||
uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
|
||||
version = "1.0.0"
|
||||
version = "1.0.1"
|
||||
|
||||
[[Distributed]]
|
||||
deps = ["Random", "Serialization", "Sockets"]
|
||||
|
@ -123,26 +129,26 @@ version = "1.2.0"
|
|||
|
||||
[[FFTW_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "05674f209a6e3387dd103a945b0113eeb64b1a58"
|
||||
git-tree-sha1 = "ddb57f4cf125243b4aa4908c94d73a805f3cbf2c"
|
||||
uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a"
|
||||
version = "3.3.9+3"
|
||||
version = "3.3.9+4"
|
||||
|
||||
[[FillArrays]]
|
||||
deps = ["LinearAlgebra", "Random", "SparseArrays"]
|
||||
git-tree-sha1 = "fec413d4fc547992eb62a5c544cedb6d7853c1f5"
|
||||
git-tree-sha1 = "85c6b57e2680fa28d5c8adc798967377646fbf66"
|
||||
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
|
||||
version = "0.8.4"
|
||||
version = "0.8.5"
|
||||
|
||||
[[FixedPointNumbers]]
|
||||
git-tree-sha1 = "d14a6fa5890ea3a7e5dcab6811114f132fec2b4b"
|
||||
git-tree-sha1 = "4aaea64dd0c30ad79037084f8ca2b94348e65eaa"
|
||||
uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
|
||||
version = "0.6.1"
|
||||
version = "0.7.1"
|
||||
|
||||
[[ForwardDiff]]
|
||||
deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"]
|
||||
git-tree-sha1 = "840700059391d36e2498d89c2e82c08f261f2a2a"
|
||||
git-tree-sha1 = "88b082d492be6b63f967b6c96b352e25ced1a34c"
|
||||
uuid = "f6369f11-7733-5829-9624-2563aa707210"
|
||||
version = "0.10.8"
|
||||
version = "0.10.9"
|
||||
|
||||
[[GPUArrays]]
|
||||
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"]
|
||||
|
@ -152,9 +158,9 @@ version = "2.0.1"
|
|||
|
||||
[[IRTools]]
|
||||
deps = ["InteractiveUtils", "MacroTools", "Test"]
|
||||
git-tree-sha1 = "72421971e60917b8cd7737f9577c4f0f87eab306"
|
||||
git-tree-sha1 = "1a4355e4b5b50be2311ebb644f34f3306dbd0410"
|
||||
uuid = "7869d1d1-7146-5819-86e3-90919afe41df"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
|
||||
[[IntelOpenMP_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
|
@ -167,10 +173,10 @@ deps = ["Markdown"]
|
|||
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
|
||||
|
||||
[[Juno]]
|
||||
deps = ["Base64", "Logging", "Media", "Profile", "Test"]
|
||||
git-tree-sha1 = "30d94657a422d09cb97b6f86f04f750fa9c50df8"
|
||||
deps = ["Base64", "Logging", "Media", "Profile"]
|
||||
git-tree-sha1 = "4f2249fb58cfb140eeb89428e31791e2f8959d8c"
|
||||
uuid = "e5e0dc1b-0480-54bc-9374-aad01c23163d"
|
||||
version = "0.7.2"
|
||||
version = "0.8.0"
|
||||
|
||||
[[LLVM]]
|
||||
deps = ["CEnum", "Libdl", "Printf", "Unicode"]
|
||||
|
@ -192,16 +198,16 @@ uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
|||
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
|
||||
|
||||
[[MKL_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "61069ae718b8ab1e325bbfb4e5268902e7ea08e3"
|
||||
deps = ["IntelOpenMP_jll", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "720629cc8cbd12c146ca01b661fd1a6cf66e2ff4"
|
||||
uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
|
||||
version = "2019.0.117+0"
|
||||
version = "2019.0.117+2"
|
||||
|
||||
[[MacroTools]]
|
||||
deps = ["DataStructures", "Markdown", "Random"]
|
||||
git-tree-sha1 = "e2fc7a55bb2224e203bbd8b59f72b91323233458"
|
||||
git-tree-sha1 = "07ee65e03e28ca88bc9a338a3726ae0c3efaa94b"
|
||||
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
|
||||
version = "0.5.3"
|
||||
version = "0.5.4"
|
||||
|
||||
[[Markdown]]
|
||||
deps = ["Base64"]
|
||||
|
@ -224,9 +230,9 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804"
|
|||
|
||||
[[NNlib]]
|
||||
deps = ["BinaryProvider", "Libdl", "LinearAlgebra", "Requires", "Statistics"]
|
||||
git-tree-sha1 = "135c0de4794d5e214b06f1fb4787af4a72896e61"
|
||||
git-tree-sha1 = "755c0bab3912ff782167e1b4b774b833f8a0e550"
|
||||
uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
|
||||
version = "0.6.2"
|
||||
version = "0.6.4"
|
||||
|
||||
[[NaNMath]]
|
||||
git-tree-sha1 = "928b8ca9b2791081dc71a51c55347c27c618760f"
|
||||
|
@ -234,10 +240,10 @@ uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
|
|||
version = "0.3.3"
|
||||
|
||||
[[OpenSpecFun_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "65f672edebf3f4e613ddf37db9dcbd7a407e5e90"
|
||||
deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "d110040968b9afe95c6bd9c6233570b0fe8abd22"
|
||||
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
|
||||
version = "0.5.3+1"
|
||||
version = "0.5.3+2"
|
||||
|
||||
[[OrderedCollections]]
|
||||
deps = ["Random", "Serialization", "Test"]
|
||||
|
@ -246,7 +252,7 @@ uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
|
|||
version = "1.1.0"
|
||||
|
||||
[[Pkg]]
|
||||
deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
|
||||
deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"]
|
||||
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
|
||||
|
||||
[[Printf]]
|
||||
|
@ -273,9 +279,9 @@ version = "0.2.0"
|
|||
|
||||
[[Requires]]
|
||||
deps = ["UUIDs"]
|
||||
git-tree-sha1 = "999513b7dea8ac17359ed50ae8ea089e4464e35e"
|
||||
git-tree-sha1 = "d37400976e98018ee840e0ca4f9d20baa231dc6b"
|
||||
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
|
||||
version = "1.0.0"
|
||||
version = "1.0.1"
|
||||
|
||||
[[SHA]]
|
||||
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
|
||||
|
@ -298,9 +304,9 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
|
|||
|
||||
[[SpecialFunctions]]
|
||||
deps = ["OpenSpecFun_jll"]
|
||||
git-tree-sha1 = "268052ee908b2c086cc0011f528694f02f3e2408"
|
||||
git-tree-sha1 = "e19b98acb182567bcb7b75bb5d9eedf3a3b5ec6c"
|
||||
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
|
||||
version = "0.9.0"
|
||||
version = "0.10.0"
|
||||
|
||||
[[StaticArrays]]
|
||||
deps = ["LinearAlgebra", "Random", "Statistics"]
|
||||
|
@ -314,9 +320,9 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
|
|||
|
||||
[[StatsBase]]
|
||||
deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"]
|
||||
git-tree-sha1 = "c53e809e63fe5cf5de13632090bc3520649c9950"
|
||||
git-tree-sha1 = "be5c7d45daa449d12868f4466dbf5882242cf2d9"
|
||||
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
|
||||
version = "0.32.0"
|
||||
version = "0.32.1"
|
||||
|
||||
[[Test]]
|
||||
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
|
||||
|
@ -343,21 +349,23 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
|
|||
|
||||
[[ZipFile]]
|
||||
deps = ["Libdl", "Printf", "Zlib_jll"]
|
||||
git-tree-sha1 = "5de8320a46812da1a8ca98b16a8a4546d44efa62"
|
||||
git-tree-sha1 = "8748302cfdec02c4ae9c97b112cf10003f7f767f"
|
||||
uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
|
||||
version = "0.9.0"
|
||||
version = "0.9.1"
|
||||
|
||||
[[Zlib_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "5618a43055eb09377edca21d19d0e99bce24a9c3"
|
||||
git-tree-sha1 = "fd36a6739e256527287c5444960d0266712cd49e"
|
||||
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
|
||||
version = "1.2.11+7"
|
||||
version = "1.2.11+8"
|
||||
|
||||
[[Zygote]]
|
||||
deps = ["DiffRules", "FFTW", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"]
|
||||
git-tree-sha1 = "74382bcc4c1e8075e14554da67d75565f8fb7827"
|
||||
git-tree-sha1 = "3c65158c0aa0808cdfff8bca2a36430b038aad00"
|
||||
repo-rev = "master"
|
||||
repo-url = "https://github.com/FluxML/Zygote.jl.git"
|
||||
uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
|
||||
version = "0.4.5"
|
||||
version = "0.4.7"
|
||||
|
||||
[[ZygoteRules]]
|
||||
deps = ["MacroTools"]
|
||||
|
|
11
Project.toml
11
Project.toml
|
@ -1,6 +1,6 @@
|
|||
name = "Flux"
|
||||
uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c"
|
||||
version = "0.10.1"
|
||||
version = "0.10.2"
|
||||
|
||||
[deps]
|
||||
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
|
||||
|
@ -27,9 +27,9 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
|
|||
AbstractTrees = "0.2, 0.3"
|
||||
Adapt = "1"
|
||||
CodecZlib = "0.5, 0.6"
|
||||
Colors = "0.8, 0.9"
|
||||
Colors = "0.8, 0.9, 0.10, 0.11"
|
||||
CuArrays = "1.6"
|
||||
Juno = "0.5, 0.6, 0.7"
|
||||
Juno = "0.5, 0.6, 0.7, 0.8"
|
||||
MacroTools = "0.3, 0.4, 0.5"
|
||||
NNlib = "0.6"
|
||||
Reexport = "0.2"
|
||||
|
@ -40,7 +40,10 @@ julia = "1"
|
|||
|
||||
[extras]
|
||||
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
|
||||
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
|
||||
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
||||
|
||||
[targets]
|
||||
test = ["Test", "Documenter"]
|
||||
test = ["Test", "Documenter", "IterTools", "LinearAlgebra"]
|
||||
|
|
|
@ -14,13 +14,17 @@ makedocs(modules=[Flux, NNlib],
|
|||
"Recurrence" => "models/recurrence.md",
|
||||
"Regularisation" => "models/regularisation.md",
|
||||
"Model Reference" => "models/layers.md",
|
||||
"Advanced Model Building" => "models/advanced.md"],
|
||||
"Advanced Model Building" => "models/advanced.md",
|
||||
"NNlib" => "models/nnlib.md"],
|
||||
"Handling Data" =>
|
||||
["One-Hot Encoding" => "data/onehot.md",
|
||||
"DataLoader" => "data/dataloader.md"],
|
||||
"Training Models" =>
|
||||
["Optimisers" => "training/optimisers.md",
|
||||
"Training" => "training/training.md"],
|
||||
"One-Hot Encoding" => "data/onehot.md",
|
||||
"GPU Support" => "gpu.md",
|
||||
"Saving & Loading" => "saving.md",
|
||||
"The Julia Ecosystem" => "ecosystem.md",
|
||||
"Performance Tips" => "performance.md",
|
||||
"Community" => "community.md"],
|
||||
format = Documenter.HTML(assets = ["assets/flux.css"],
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
# DataLoader
|
||||
Flux provides the `DataLoader` type in the `Flux.Data` module to handle iteration over mini-batches of data.
|
||||
|
||||
```@docs
|
||||
Flux.Data.DataLoader
|
||||
```
|
|
@ -0,0 +1,21 @@
|
|||
# The Julia Ecosystem
|
||||
|
||||
One of the main strengths of Julia lies in an ecosystem of packages
|
||||
globally providing a rich and consistent user experience.
|
||||
|
||||
This is a non-exhaustive list of Julia packages, nicely complementing `Flux` in typical
|
||||
machine learning and deep learning workflows:
|
||||
|
||||
- [ArgParse.jl](https://github.com/carlobaldassi/ArgParse.jl): package for parsing command-line arguments to Julia programs.
|
||||
- [Augmentor.jl](https://github.com/Evizero/Augmentor.jl): a fast image augmentation library in Julia for machine learning.
|
||||
- [BSON.jl](https://github.com/JuliaIO/BSON.jl): package for working with the Binary JSON serialisation format
|
||||
- [DataFrames.jl](https://github.com/joshday/OnlineStats.jl): in-memory tabular data in Julia
|
||||
- [DrWatson.jl](https://github.com/JuliaDynamics/DrWatson.jl): a scientific project assistant software
|
||||
- [MLDatasets.jl](https://github.com/JuliaML/MLDatasets.jl): utility package for accessing common machine learning datasets
|
||||
- [OnlineStats.jl](https://github.com/joshday/OnlineStats.jl): single-pass algorithms for statistics
|
||||
- [Parameters.jl](https://github.com/mauro3/Parameters.jl): types with default field values, keyword constructors and (un-)pack macros
|
||||
- [ProgressMeters.jl](https://github.com/timholy/ProgressMeter.jl): progress meters for long-running computations
|
||||
- [TensorBoardLogger.jl](https://github.com/PhilipVinc/TensorBoardLogger.jl): easy peasy logging to [tensorboard](https://www.tensorflow.org/tensorboard) in Julia
|
||||
|
||||
|
||||
This tight integration among Julia pakages is shown in some of the examples in the [model-zoo](https://github.com/FluxML/model-zoo) repository.
|
|
@ -30,7 +30,7 @@ If you define a structured model, like a `Dense` layer or `Chain`, you just need
|
|||
```julia
|
||||
d = Dense(10, 5, σ)
|
||||
d = fmap(cu, d)
|
||||
d.W # Tracked CuArray
|
||||
d.W # CuArray
|
||||
d(cu(rand(10))) # CuArray output
|
||||
|
||||
m = Chain(Dense(10, 5, σ), Dense(5, 2), softmax)
|
||||
|
@ -53,7 +53,7 @@ julia> x = rand(10) |> gpu
|
|||
0.511655
|
||||
|
||||
julia> m(x)
|
||||
Tracked 5-element CuArray{Float32,1}:
|
||||
5-element CuArray{Float32,1}:
|
||||
-0.30535
|
||||
⋮
|
||||
-0.618002
|
||||
|
|
|
@ -69,8 +69,8 @@ b = rand(2)
|
|||
predict(x) = W*x .+ b
|
||||
|
||||
function loss(x, y)
|
||||
ŷ = predict(x)
|
||||
sum((y .- ŷ).^2)
|
||||
ŷ = predict(x)
|
||||
sum((y .- ŷ).^2)
|
||||
end
|
||||
|
||||
x, y = rand(5), rand(2) # Dummy data
|
||||
|
@ -221,3 +221,24 @@ Flux.@functor Affine
|
|||
This enables a useful extra set of functionality for our `Affine` layer, such as [collecting its parameters](../training/optimisers.md) or [moving it to the GPU](../gpu.md).
|
||||
|
||||
For some more helpful tricks, including parameter freezing, please checkout the [advanced usage guide](advacned.md).
|
||||
|
||||
## Utility functions
|
||||
|
||||
Flux provides some utility functions to help you generate models in an automated fashion.
|
||||
|
||||
`outdims` enables you to calculate the spatial output dimensions of layers like `Conv` when applied to input images of a given size.
|
||||
Currently limited to the following layers:
|
||||
- `Chain`
|
||||
- `Dense`
|
||||
- `Conv`
|
||||
- `Diagonal`
|
||||
- `Maxout`
|
||||
- `ConvTranspose`
|
||||
- `DepthwiseConv`
|
||||
- `CrossCor`
|
||||
- `MaxPool`
|
||||
- `MeanPool`
|
||||
|
||||
```@docs
|
||||
outdims
|
||||
```
|
||||
|
|
|
@ -40,19 +40,6 @@ Maxout
|
|||
SkipConnection
|
||||
```
|
||||
|
||||
## Activation Functions
|
||||
|
||||
Non-linearities that go between layers of your model. Most of these functions are defined in [NNlib](https://github.com/FluxML/NNlib.jl) but are available by default in Flux.
|
||||
|
||||
Note that, unless otherwise stated, activation functions operate on scalars. To apply them to an array you can call `σ.(xs)`, `relu.(xs)` and so on.
|
||||
|
||||
```@docs
|
||||
σ
|
||||
relu
|
||||
leakyrelu
|
||||
elu
|
||||
swish
|
||||
```
|
||||
|
||||
## Normalisation & Regularisation
|
||||
|
||||
|
@ -61,19 +48,29 @@ These layers don't affect the structure of the network but may improve training
|
|||
```@docs
|
||||
BatchNorm
|
||||
Dropout
|
||||
Flux.dropout
|
||||
AlphaDropout
|
||||
LayerNorm
|
||||
GroupNorm
|
||||
```
|
||||
|
||||
### Testmode
|
||||
|
||||
Many normalisation layers behave differently under training and inference (testing). By default, Flux will automatically determine when a layer evaluation is part of training or inference. Still, depending on your use case, it may be helpful to manually specify when these layers should be treated as being trained or not. For this, Flux provides `testmode!`. When called on a model (e.g. a layer or chain of layers), this function will place the model into the mode specified.
|
||||
|
||||
```@docs
|
||||
testmode!
|
||||
trainmode!
|
||||
```
|
||||
|
||||
## Cost Functions
|
||||
```@docs
|
||||
mse
|
||||
crossentropy
|
||||
logitcrossentropy
|
||||
binarycrossentropy
|
||||
logitbinarycrossentropy
|
||||
kldivergence
|
||||
poisson
|
||||
hinge
|
||||
Flux.mse
|
||||
Flux.crossentropy
|
||||
Flux.logitcrossentropy
|
||||
Flux.binarycrossentropy
|
||||
Flux.logitbinarycrossentropy
|
||||
Flux.kldivergence
|
||||
Flux.poisson
|
||||
Flux.hinge
|
||||
```
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
# NNlib
|
||||
Flux re-exports all of the functions exported by the [NNlib](https://github.com/FluxML/NNlib.jl) package.
|
||||
|
||||
## Activation Functions
|
||||
Non-linearities that go between layers of your model. Note that, unless otherwise stated, activation functions operate on scalars. To apply them to an array you can call `σ.(xs)`, `relu.(xs)` and so on.
|
||||
|
||||
```@docs
|
||||
NNlib.elu
|
||||
NNlib.gelu
|
||||
NNlib.leakyrelu
|
||||
NNlib.logcosh
|
||||
NNlib.logsigmoid
|
||||
NNlib.sigmoid
|
||||
NNlib.relu
|
||||
NNlib.selu
|
||||
NNlib.softplus
|
||||
NNlib.softsign
|
||||
NNlib.swish
|
||||
```
|
||||
|
||||
## Softmax
|
||||
```@docs
|
||||
NNlib.softmax
|
||||
NNlib.logsoftmax
|
||||
```
|
||||
|
||||
## Pooling
|
||||
```@docs
|
||||
NNlib.maxpool
|
||||
NNlib.meanpool
|
||||
```
|
||||
|
||||
## Convolution
|
||||
```@docs
|
||||
NNlib.conv
|
||||
NNlib.depthwiseconv
|
||||
```
|
|
@ -31,7 +31,7 @@ julia> params(m)
|
|||
param([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
julia> sum(norm, params(m))
|
||||
26.01749952921026 (tracked)
|
||||
26.01749952921026
|
||||
```
|
||||
|
||||
Here's a larger example with a multi-layer perceptron.
|
||||
|
@ -52,7 +52,7 @@ One can also easily add per-layer regularisation via the `activations` function:
|
|||
```julia
|
||||
julia> using Flux: activations
|
||||
|
||||
julia> c = Chain(Dense(10,5,σ),Dense(5,2),softmax)
|
||||
julia> c = Chain(Dense(10, 5, σ), Dense(5, 2), softmax)
|
||||
Chain(Dense(10, 5, σ), Dense(5, 2), softmax)
|
||||
|
||||
julia> activations(c, rand(10))
|
||||
|
|
|
@ -21,7 +21,7 @@ grads = gradient(() -> loss(x, y), θ)
|
|||
We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here's one way to do that:
|
||||
|
||||
```julia
|
||||
using Flux: update!
|
||||
using Flux.Optimise: update!
|
||||
|
||||
η = 0.1 # Learning Rate
|
||||
for p in (W, b)
|
||||
|
@ -46,6 +46,7 @@ An optimiser `update!` accepts a parameter and a gradient, and updates the param
|
|||
All optimisers return an object that, when passed to `train!`, will update the parameters passed to it.
|
||||
|
||||
```@docs
|
||||
Flux.Optimise.update!
|
||||
Descent
|
||||
Momentum
|
||||
Nesterov
|
||||
|
@ -61,7 +62,7 @@ ADAMW
|
|||
|
||||
## Optimiser Interface
|
||||
|
||||
Flux's optimsers are built around a `struct` that holds all the optimiser parameters along with a definition of how to apply the update rule associated with it. We do this via the `apply!` function which takes the optimiser as the first argument followed by the parameter and its corresponding gradient.
|
||||
Flux's optimisers are built around a `struct` that holds all the optimiser parameters along with a definition of how to apply the update rule associated with it. We do this via the `apply!` function which takes the optimiser as the first argument followed by the parameter and its corresponding gradient.
|
||||
|
||||
In this manner Flux also allows one to create custom optimisers to be used seamlessly. Let's work this with a simple example.
|
||||
|
||||
|
@ -99,15 +100,15 @@ Flux internally calls on this function via the `update!` function. It shares the
|
|||
|
||||
## Composing Optimisers
|
||||
|
||||
Flux defines a special kind of optimiser called simply as `Optimiser` which takes in a arbitrary optimisers as input. Its behaviour is similar to the usual optimisers, but differs in that it acts by calling the optimisers listed in it sequentially. Each optimiser produces a modified gradient
|
||||
Flux defines a special kind of optimiser simply called `Optimiser` which takes in arbitrary optimisers as input. Its behaviour is similar to the usual optimisers, but differs in that it acts by calling the optimisers listed in it sequentially. Each optimiser produces a modified gradient
|
||||
that will be fed into the next, and the resultant update will be applied to the parameter as usual. A classic use case is where adding decays is desirable. Flux defines some basic decays including `ExpDecay`, `InvDecay` etc.
|
||||
|
||||
```julia
|
||||
opt = Optimiser(ExpDecay(0.001, 0.1, 1000, 1e-4), Descent())
|
||||
```
|
||||
|
||||
Here we apply exponential decay to the `Descent` optimser. The defaults of `ExpDecay` say that its learning rate will be decayed every 1000 steps.
|
||||
It is then applied like any optimser.
|
||||
Here we apply exponential decay to the `Descent` optimiser. The defaults of `ExpDecay` say that its learning rate will be decayed every 1000 steps.
|
||||
It is then applied like any optimiser.
|
||||
|
||||
```julia
|
||||
w = randn(10, 10)
|
||||
|
|
|
@ -7,10 +7,10 @@ To actually train a model we need four things:
|
|||
* A collection of data points that will be provided to the objective function.
|
||||
* An [optimiser](optimisers.md) that will update the model parameters appropriately.
|
||||
|
||||
With these we can call `Flux.train!`:
|
||||
With these we can call `train!`:
|
||||
|
||||
```julia
|
||||
Flux.train!(objective, params, data, opt)
|
||||
```@docs
|
||||
Flux.Optimise.train!
|
||||
```
|
||||
|
||||
There are plenty of examples in the [model zoo](https://github.com/FluxML/model-zoo).
|
||||
|
@ -58,7 +58,8 @@ data = [(x, y)]
|
|||
```julia
|
||||
data = [(x, y), (x, y), (x, y)]
|
||||
# Or equivalently
|
||||
data = Iterators.repeated((x, y), 3)
|
||||
using IterTools: ncycle
|
||||
data = ncycle([(x, y)], 3)
|
||||
```
|
||||
|
||||
It's common to load the `x`s and `y`s separately. In this case you can use `zip`:
|
||||
|
@ -69,6 +70,14 @@ ys = [rand( 10), rand( 10), rand( 10)]
|
|||
data = zip(xs, ys)
|
||||
```
|
||||
|
||||
Training data can be conveniently partitioned for mini-batch training using the [`Flux.Data.DataLoader`](@ref) type:
|
||||
|
||||
```julia
|
||||
X = rand(28, 28, 60000)
|
||||
Y = rand(0:9, 60000)
|
||||
data = DataLoader(X, Y, batchsize=128)
|
||||
```
|
||||
|
||||
Note that, by default, `train!` only loops over the data once (a single "epoch").
|
||||
A convenient way to run multiple epochs from the REPL is provided by `@epochs`.
|
||||
|
||||
|
@ -122,7 +131,7 @@ An example follows that works similar to the default `Flux.train` but with no ca
|
|||
You don't need callbacks if you just code the calls to your functions directly into the loop.
|
||||
E.g. in the places marked with comments.
|
||||
|
||||
```
|
||||
```julia
|
||||
function my_custom_train!(loss, ps, data, opt)
|
||||
ps = Params(ps)
|
||||
for d in data
|
||||
|
|
|
@ -7,11 +7,12 @@ using Zygote, MacroTools, Juno, Reexport, Statistics, Random
|
|||
using MacroTools: @forward
|
||||
@reexport using NNlib
|
||||
using Zygote: Params, @adjoint, gradient, pullback, @nograd
|
||||
|
||||
export gradient
|
||||
|
||||
export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, MaxPool, MeanPool,
|
||||
DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm,
|
||||
SkipConnection, params, fmap, cpu, gpu, f32, f64
|
||||
SkipConnection, params, fmap, cpu, gpu, f32, f64, testmode!, trainmode!
|
||||
|
||||
include("optimise/Optimise.jl")
|
||||
using .Optimise
|
||||
|
|
|
@ -3,6 +3,9 @@ module Data
|
|||
import ..Flux
|
||||
import SHA
|
||||
|
||||
using Random: shuffle!
|
||||
using Base: @propagate_inbounds
|
||||
|
||||
export CMUDict, cmudict
|
||||
|
||||
deps(path...) = joinpath(@__DIR__, "..", "..", "deps", path...)
|
||||
|
@ -26,6 +29,9 @@ function __init__()
|
|||
mkpath(deps())
|
||||
end
|
||||
|
||||
include("dataloader.jl")
|
||||
export DataLoader
|
||||
|
||||
include("mnist.jl")
|
||||
export MNIST
|
||||
|
||||
|
@ -42,4 +48,7 @@ using .Sentiment
|
|||
include("iris.jl")
|
||||
export Iris
|
||||
|
||||
include("housing.jl")
|
||||
export Housing
|
||||
|
||||
end
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
# Adapted from Knet's src/data.jl (author: Deniz Yuret)
|
||||
|
||||
struct DataLoader
|
||||
data
|
||||
batchsize::Int
|
||||
nobs::Int
|
||||
partial::Bool
|
||||
imax::Int
|
||||
indices::Vector{Int}
|
||||
shuffle::Bool
|
||||
end
|
||||
|
||||
"""
|
||||
DataLoader(data...; batchsize=1, shuffle=false, partial=true)
|
||||
|
||||
An object that iterates over mini-batches of `data`, each mini-batch containing `batchsize` observations
|
||||
(except possibly the last one).
|
||||
|
||||
Takes as input one or more data tensors, e.g. X in unsupervised learning, X and Y in
|
||||
supervised learning. The last dimension in each tensor is considered to be the observation
|
||||
dimension.
|
||||
|
||||
If `shuffle=true`, shuffles the observations each time iterations are re-started.
|
||||
If `partial=false`, drops the last mini-batch if it is smaller than the batchsize.
|
||||
|
||||
Example usage:
|
||||
|
||||
Xtrain = rand(10, 100)
|
||||
dtrain = DataLoader(Xtrain, batchsize=2)
|
||||
# iterate over 50 mini-batches
|
||||
for x in dtrain:
|
||||
@assert size(x) == (10, 2)
|
||||
...
|
||||
end
|
||||
|
||||
Xtrain = rand(10, 100)
|
||||
Ytrain = rand(100)
|
||||
dtrain = DataLoader(Xtrain, Ytrain, batchsize=2, shuffle=true)
|
||||
for epoch in 1:100
|
||||
for (x, y) in dtrain:
|
||||
@assert size(x) == (10, 2)
|
||||
@assert size(y) == (2,)
|
||||
...
|
||||
end
|
||||
end
|
||||
|
||||
# train for 10 epochs
|
||||
using IterTools: ncycle
|
||||
Flux.train!(loss, ps, ncycle(dtrain, 10), opt)
|
||||
"""
|
||||
function DataLoader(data...; batchsize=1, shuffle=false, partial=true)
|
||||
length(data) > 0 || throw(ArgumentError("Need at least one data input"))
|
||||
batchsize > 0 || throw(ArgumentError("Need positive batchsize"))
|
||||
|
||||
nx = size(data[1])[end]
|
||||
for i=2:length(data)
|
||||
nx != size(data[i])[end] && throw(DimensionMismatch("All data should contain same number of observations"))
|
||||
end
|
||||
if nx < batchsize
|
||||
@warn "Number of data points less than batchsize, decreasing the batchsize to $nx"
|
||||
batchsize = nx
|
||||
end
|
||||
imax = partial ? nx : nx - batchsize + 1
|
||||
ids = 1:min(nx, batchsize)
|
||||
DataLoader(data, batchsize, nx, partial, imax, [1:nx;], shuffle)
|
||||
end
|
||||
|
||||
getdata(x::AbstractArray, ids) = x[(Base.Colon() for _=1:ndims(x)-1)..., ids]
|
||||
|
||||
@propagate_inbounds function Base.iterate(d::DataLoader, i=0) # returns data in d.indices[i+1:i+batchsize]
|
||||
i >= d.imax && return nothing
|
||||
if d.shuffle && i == 0
|
||||
shuffle!(d.indices)
|
||||
end
|
||||
nexti = min(i + d.batchsize, d.nobs)
|
||||
ids = d.indices[i+1:nexti]
|
||||
if length(d.data) == 1
|
||||
batch = getdata(d.data[1], ids)
|
||||
else
|
||||
batch = ((getdata(x, ids) for x in d.data)...,)
|
||||
end
|
||||
return (batch, nexti)
|
||||
end
|
||||
|
||||
function Base.length(d::DataLoader)
|
||||
n = d.nobs / d.batchsize
|
||||
d.partial ? ceil(Int,n) : floor(Int,n)
|
||||
end
|
|
@ -0,0 +1,136 @@
|
|||
"""
|
||||
1. Title: Boston Housing Data
|
||||
|
||||
2. Sources:
|
||||
(a) Origin: This dataset was taken from the StatLib library which is
|
||||
maintained at Carnegie Mellon University.
|
||||
(b) Creator: Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the
|
||||
demand for clean air', J. Environ. Economics & Management,
|
||||
vol.5, 81-102, 1978.
|
||||
(c) Date: July 7, 1993
|
||||
|
||||
3. Number of Instances: 506
|
||||
|
||||
4. Number of Attributes: 13 continuous attributes (including "class"
|
||||
attribute "MEDV"), 1 binary-valued attribute.
|
||||
|
||||
5. Attribute Information:
|
||||
|
||||
1. CRIM per capita crime rate by town
|
||||
2. ZN proportion of residential land zoned for lots over
|
||||
25,000 sq.ft.
|
||||
3. INDUS proportion of non-retail business acres per town
|
||||
4. CHAS Charles River dummy variable (= 1 if tract bounds
|
||||
river; 0 otherwise)
|
||||
5. NOX nitric oxides concentration (parts per 10 million)
|
||||
6. RM average number of rooms per dwelling
|
||||
7. AGE proportion of owner-occupied units built prior to 1940
|
||||
8. DIS weighted distances to five Boston employment centres
|
||||
9. RAD index of accessibility to radial highways
|
||||
10. TAX full-value property-tax rate per 10,000 dollars
|
||||
11. PTRATIO pupil-teacher ratio by town
|
||||
12. B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks
|
||||
by town
|
||||
13. LSTAT % lower status of the population
|
||||
14. MEDV Median value of owner-occupied homes in 1000's of dollars
|
||||
|
||||
Downloaded From: https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data
|
||||
|
||||
"""
|
||||
module Housing
|
||||
|
||||
using DelimitedFiles
|
||||
using ..Data: deps, download_and_verify
|
||||
|
||||
#Uncomment if package exists
|
||||
#const cache_prefix = "https://cache.julialang.org/"
|
||||
const cache_prefix = ""
|
||||
|
||||
function load()
|
||||
isfile(deps("housing.data")) && return
|
||||
|
||||
@info "Downloading the Boston housing Dataset"
|
||||
download_and_verify("$(cache_prefix)https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data",
|
||||
deps("housing.data"),
|
||||
"baadf72995725d76efe787b664e1f083388c79ba21ef9a7990d87f774184735a")
|
||||
|
||||
#@info "Download complete. Working on the files"
|
||||
path = deps()
|
||||
isfile(deps("housing.data")) && touch(joinpath(path, "tempfile.data"))
|
||||
open(joinpath(path, "tempfile.data"), "a") do fout
|
||||
open(deps("housing.data"), "r") do fin
|
||||
for line in eachline(fin)
|
||||
line = replace(lstrip(line), r" +" => s",")
|
||||
println(fout, line)
|
||||
end
|
||||
end
|
||||
end
|
||||
mv(joinpath(path, "tempfile.data"), deps("housing.data"), force=true)
|
||||
end
|
||||
|
||||
"""
|
||||
Gets the targets for the Boston housing dataset, a 506 element array listing the targets for each example
|
||||
|
||||
```jldoctest
|
||||
julia> using Flux
|
||||
|
||||
julia> target = Flux.Data.Housing.targets()
|
||||
|
||||
julia> summary(target)
|
||||
506×1 Array{Float64,2}
|
||||
|
||||
julia> target[1]
|
||||
24.0
|
||||
|
||||
"""
|
||||
function targets()
|
||||
load()
|
||||
housing = readdlm(deps("housing.data"), ',')
|
||||
reshape(Vector{Float64}(housing[1:end,end]), (506, 1))
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
Gets the names of the features provided in the dataset
|
||||
|
||||
"""
|
||||
function feature_names()
|
||||
["crim","zn","indus","chas","nox","rm","age","dis","rad","tax","ptratio","b","lstat"]
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
Gets the features of the Boston Housing Dataset. This is a 506x13 Matrix of Float64 datatypes.
|
||||
The values are in the order ["crim","zn","indus","chas","nox","rm","age","dis","rad","tax","ptratio","b","lstat"].
|
||||
It has 506 examples.
|
||||
|
||||
```jldoctest
|
||||
julia> using Flux
|
||||
|
||||
julia> features = Flux.Data.Housing.features()
|
||||
|
||||
julia> summary(features)
|
||||
506×13 Array{Float64,2}
|
||||
|
||||
julia> features[1, :]
|
||||
13-element Array{Float64,1}:
|
||||
0.00632
|
||||
18.0
|
||||
2.31
|
||||
0.0
|
||||
0.538
|
||||
⋮
|
||||
296.0
|
||||
15.3
|
||||
396.9
|
||||
4.98
|
||||
|
||||
"""
|
||||
function features()
|
||||
load()
|
||||
housing = readdlm(deps("housing.data"), ',')
|
||||
Matrix{Float64}(housing[1:end, 1:13])
|
||||
end
|
||||
|
||||
|
||||
end
|
|
@ -28,7 +28,6 @@ function load()
|
|||
end
|
||||
|
||||
"""
|
||||
|
||||
labels()
|
||||
|
||||
Get the labels of the iris dataset, a 150 element array of strings listing the
|
||||
|
@ -53,7 +52,6 @@ function labels()
|
|||
end
|
||||
|
||||
"""
|
||||
|
||||
features()
|
||||
|
||||
Get the features of the iris dataset. This is a 4x150 matrix of Float64
|
||||
|
|
|
@ -39,6 +39,38 @@ end
|
|||
|
||||
trainable(m) = functor(m)[1]
|
||||
|
||||
"""
|
||||
testmode!(m, mode = true)
|
||||
|
||||
Set a layer or model's test mode (see below).
|
||||
Using `:auto` mode will treat any gradient computation as training.
|
||||
|
||||
_Note_: if you manually set a model into test mode, you need to manually place
|
||||
it back into train mode during training phase.
|
||||
|
||||
Possible values include:
|
||||
- `false` for training
|
||||
- `true` for testing
|
||||
- `:auto` or `nothing` for Flux to detect the mode automatically
|
||||
"""
|
||||
testmode!(m, mode = true) = m
|
||||
|
||||
"""
|
||||
trainmode!(m, mode = true)
|
||||
|
||||
Set a layer of model's train mode (see below).
|
||||
Symmetric to [`testmode!`](@ref) (i.e. `trainmode!(m, mode) == testmode!(m, !mode)).
|
||||
|
||||
_Note_: if you manually set a model into train mode, you need to manually place
|
||||
it into test mode during testing phase.
|
||||
|
||||
Possible values include:
|
||||
- `true` for training
|
||||
- `false` for testing
|
||||
- `:auto` or `nothing` for Flux to detect the mode automatically
|
||||
"""
|
||||
trainmode!(m, mode = true) = mode isa Bool ? testmode!(m, !mode) : testmode!(m, mode)
|
||||
|
||||
params!(p::Params, x::AbstractArray{<:Number}, seen = IdSet()) = push!(p, x)
|
||||
|
||||
function params!(p::Params, x, seen = IdSet())
|
||||
|
|
|
@ -33,12 +33,25 @@ applychain(fs::Tuple, x) = applychain(tail(fs), first(fs)(x))
|
|||
|
||||
Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]...)
|
||||
|
||||
testmode!(m::Chain, mode = true) = (map(x -> testmode!(x, mode), m.layers); m)
|
||||
|
||||
function Base.show(io::IO, c::Chain)
|
||||
print(io, "Chain(")
|
||||
join(io, c.layers, ", ")
|
||||
print(io, ")")
|
||||
end
|
||||
|
||||
"""
|
||||
outdims(c::Chain, isize)
|
||||
|
||||
Calculate the output dimensions given the input dimensions, `isize`.
|
||||
|
||||
```julia
|
||||
m = Chain(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32))
|
||||
outdims(m, (10, 10)) == (6, 6)
|
||||
```
|
||||
"""
|
||||
outdims(c::Chain, isize) = foldl(∘, map(l -> (x -> outdims(l, x)), c.layers))(isize)
|
||||
|
||||
# This is a temporary and naive implementation
|
||||
# it might be replaced in the future for better performance
|
||||
|
@ -116,6 +129,19 @@ end
|
|||
(a::Dense{<:Any,W})(x::AbstractArray{<:AbstractFloat}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
|
||||
a(T.(x))
|
||||
|
||||
"""
|
||||
outdims(l::Dense, isize)
|
||||
|
||||
Calculate the output dimensions given the input dimensions, `isize`.
|
||||
|
||||
```julia
|
||||
m = Dense(10, 5)
|
||||
outdims(m, (5, 2)) == (5,)
|
||||
outdims(m, (10,)) == (5,)
|
||||
```
|
||||
"""
|
||||
outdims(l::Dense, isize) = (size(l.W)[1],)
|
||||
|
||||
"""
|
||||
Diagonal(in::Integer)
|
||||
|
||||
|
@ -145,6 +171,7 @@ function Base.show(io::IO, l::Diagonal)
|
|||
print(io, "Diagonal(", length(l.α), ")")
|
||||
end
|
||||
|
||||
outdims(l::Diagonal, isize) = (length(l.α),)
|
||||
|
||||
"""
|
||||
Maxout(over)
|
||||
|
@ -193,6 +220,8 @@ function (mo::Maxout)(input::AbstractArray)
|
|||
mapreduce(f -> f(input), (acc, out) -> max.(acc, out), mo.over)
|
||||
end
|
||||
|
||||
outdims(l::Maxout, isize) = outdims(first(l.over), isize)
|
||||
|
||||
"""
|
||||
SkipConnection(layers, connection)
|
||||
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
using NNlib: conv, ∇conv_data, depthwiseconv
|
||||
using NNlib: conv, ∇conv_data, depthwiseconv, output_size
|
||||
|
||||
# pad dims of x with dims of y until ndims(x) == ndims(y)
|
||||
_paddims(x::Tuple, y::Tuple) = (x..., y[(end - (length(y) - length(x) - 1)):end]...)
|
||||
|
||||
_convtransoutdims(isize, ksize, ssize, dsize, pad) = (isize .- 1).*ssize .+ 1 .+ (ksize .- 1).*dsize .- (pad[1:2:end] .+ pad[2:2:end])
|
||||
|
||||
expand(N, i::Tuple) = i
|
||||
expand(N, i::Integer) = ntuple(_ -> i, N)
|
||||
|
@ -17,7 +22,7 @@ Example: Applying Conv layer to a 1-channel input using a 2x2 window size,
|
|||
out = 16
|
||||
Conv((2, 2), 1=>16, relu)
|
||||
|
||||
Data should be stored in WHCN order (width, height, # channels, # batches).
|
||||
Data should be stored in WHCN order (width, height, # channels, batch size).
|
||||
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
||||
and a batch of 50 would be a `100×100×3×50` array.
|
||||
|
||||
|
@ -68,6 +73,21 @@ end
|
|||
(a::Conv{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
|
||||
a(T.(x))
|
||||
|
||||
"""
|
||||
outdims(l::Conv, isize::Tuple)
|
||||
|
||||
Calculate the output dimensions given the input dimensions, `isize`.
|
||||
Batch size and channel size are ignored as per `NNlib.jl`.
|
||||
|
||||
```julia
|
||||
m = Conv((3, 3), 3 => 16)
|
||||
outdims(m, (10, 10)) == (8, 8)
|
||||
outdims(m, (10, 10, 1, 3)) == (8, 8)
|
||||
```
|
||||
"""
|
||||
outdims(l::Conv, isize) =
|
||||
output_size(DenseConvDims(_paddims(isize, size(l.weight)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation))
|
||||
|
||||
"""
|
||||
ConvTranspose(size, in=>out)
|
||||
ConvTranspose(size, in=>out, relu)
|
||||
|
@ -140,6 +160,9 @@ end
|
|||
|
||||
(a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
|
||||
a(T.(x))
|
||||
|
||||
outdims(l::ConvTranspose{N}, isize) where N = _convtransoutdims(isize[1:2], size(l.weight)[1:N], l.stride, l.dilation, l.pad)
|
||||
|
||||
"""
|
||||
DepthwiseConv(size, in=>out)
|
||||
DepthwiseConv(size, in=>out, relu)
|
||||
|
@ -204,6 +227,9 @@ end
|
|||
(a::DepthwiseConv{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
|
||||
a(T.(x))
|
||||
|
||||
outdims(l::DepthwiseConv, isize) =
|
||||
output_size(DepthwiseConvDims(_paddims(isize, (1, 1, size(l.weight)[end], 1)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation))
|
||||
|
||||
"""
|
||||
CrossCor(size, in=>out)
|
||||
CrossCor(size, in=>out, relu)
|
||||
|
@ -275,6 +301,9 @@ end
|
|||
(a::CrossCor{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
|
||||
a(T.(x))
|
||||
|
||||
outdims(l::CrossCor, isize) =
|
||||
output_size(DenseConvDims(_paddims(isize, size(l.weight)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation))
|
||||
|
||||
"""
|
||||
MaxPool(k)
|
||||
|
||||
|
@ -304,6 +333,8 @@ function Base.show(io::IO, m::MaxPool)
|
|||
print(io, "MaxPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")")
|
||||
end
|
||||
|
||||
outdims(l::MaxPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.k..., 1, 1)), l.k; stride = l.stride, padding = l.pad))
|
||||
|
||||
"""
|
||||
MeanPool(k)
|
||||
|
||||
|
@ -331,3 +362,5 @@ end
|
|||
function Base.show(io::IO, m::MeanPool)
|
||||
print(io, "MeanPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")")
|
||||
end
|
||||
|
||||
outdims(l::MeanPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.k..., 1, 1)), l.k; stride = l.stride, padding = l.pad))
|
|
@ -2,11 +2,23 @@ istraining() = false
|
|||
|
||||
@adjoint istraining() = true, _ -> nothing
|
||||
|
||||
_isactive(m) = isnothing(m.active) ? istraining() : m.active
|
||||
|
||||
_dropout_shape(s, ::Colon) = size(s)
|
||||
_dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(size(s)))...)
|
||||
|
||||
_dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0)
|
||||
|
||||
"""
|
||||
dropout(p, dims = :)
|
||||
|
||||
Dropout function. For each input, either sets that input to `0` (with probability
|
||||
`p`) or scales it by `1/(1-p)`. The `dims` argument is to specify the unbroadcasted
|
||||
dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is
|
||||
used as a regularisation, i.e. it reduces overfitting during training.
|
||||
|
||||
See also [`Dropout`](@ref).
|
||||
"""
|
||||
dropout(x, p; dims = :) = x
|
||||
|
||||
@adjoint function dropout(x, p; dims = :)
|
||||
|
@ -18,22 +30,28 @@ end
|
|||
"""
|
||||
Dropout(p, dims = :)
|
||||
|
||||
A Dropout layer. For each input, either sets that input to `0` (with probability
|
||||
`p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted
|
||||
dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is
|
||||
used as a regularisation, i.e. it reduces overfitting during training. see also [`dropout`](@ref).
|
||||
A Dropout layer. In the forward pass, applies the [`dropout`](@ref) function on the input.
|
||||
|
||||
Does nothing to the input once [`testmode!`](@ref) is false.
|
||||
"""
|
||||
mutable struct Dropout{F,D}
|
||||
p::F
|
||||
dims::D
|
||||
active::Union{Bool, Nothing}
|
||||
end
|
||||
|
||||
function Dropout(p; dims = :)
|
||||
@assert 0 ≤ p ≤ 1
|
||||
Dropout{typeof(p),typeof(dims)}(p, dims)
|
||||
Dropout{typeof(p),typeof(dims)}(p, dims, nothing)
|
||||
end
|
||||
|
||||
(a::Dropout)(x) = dropout(x, a.p; dims = a.dims)
|
||||
function (a::Dropout)(x)
|
||||
_isactive(a) || return x
|
||||
return dropout(x, a.p; dims = a.dims)
|
||||
end
|
||||
|
||||
testmode!(m::Dropout, mode = true) =
|
||||
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
|
||||
|
||||
function Base.show(io::IO, d::Dropout)
|
||||
print(io, "Dropout(", d.p)
|
||||
|
@ -43,20 +61,24 @@ end
|
|||
|
||||
"""
|
||||
AlphaDropout(p)
|
||||
|
||||
A dropout layer. It is used in Self-Normalizing Neural Networks.
|
||||
(https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf)
|
||||
The AlphaDropout layer ensures that mean and variance of activations remains the same as before.
|
||||
|
||||
Does nothing to the input once [`testmode!`](@ref) is false.
|
||||
"""
|
||||
mutable struct AlphaDropout{F}
|
||||
p::F
|
||||
function AlphaDropout(p)
|
||||
active::Union{Bool, Nothing}
|
||||
function AlphaDropout(p, active = nothing)
|
||||
@assert 0 ≤ p ≤ 1
|
||||
new{typeof(p)}(p)
|
||||
new{typeof(p)}(p, active)
|
||||
end
|
||||
end
|
||||
|
||||
function (a::AlphaDropout)(x)
|
||||
istraining() || return x
|
||||
_isactive(a) || return x
|
||||
λ = eltype(x)(1.0507009873554804934193349852946)
|
||||
α = eltype(x)(1.6732632423543772848170429916717)
|
||||
α1 = eltype(x)(-λ*α)
|
||||
|
@ -68,6 +90,9 @@ function (a::AlphaDropout)(x)
|
|||
return x
|
||||
end
|
||||
|
||||
testmode!(m::AlphaDropout, mode = true) =
|
||||
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
|
||||
|
||||
"""
|
||||
LayerNorm(h::Integer)
|
||||
|
||||
|
@ -106,6 +131,8 @@ it's the usual channel dimension.)
|
|||
shifts them to have a new mean and variance (corresponding to the learnable,
|
||||
per-channel `bias` and `scale` parameters).
|
||||
|
||||
Use [`testmode!`](@ref) during inference.
|
||||
|
||||
See [Batch Normalization: Accelerating Deep Network Training by Reducing
|
||||
Internal Covariate Shift](https://arxiv.org/pdf/1502.03167.pdf).
|
||||
|
||||
|
@ -127,12 +154,13 @@ mutable struct BatchNorm{F,V,W,N}
|
|||
σ²::W # moving std
|
||||
ϵ::N
|
||||
momentum::N
|
||||
active::Union{Bool, Nothing}
|
||||
end
|
||||
|
||||
BatchNorm(chs::Integer, λ = identity;
|
||||
initβ = (i) -> zeros(Float32, i), initγ = (i) -> ones(Float32, i), ϵ = 1f-5, momentum = 0.1f0) =
|
||||
BatchNorm(λ, initβ(chs), initγ(chs),
|
||||
zeros(chs), ones(chs), ϵ, momentum)
|
||||
zeros(chs), ones(chs), ϵ, momentum, nothing)
|
||||
|
||||
trainable(bn::BatchNorm) = (bn.β, bn.γ)
|
||||
|
||||
|
@ -145,7 +173,7 @@ function (BN::BatchNorm)(x)
|
|||
m = div(prod(size(x)), channels)
|
||||
γ = reshape(BN.γ, affine_shape...)
|
||||
β = reshape(BN.β, affine_shape...)
|
||||
if !istraining()
|
||||
if !_isactive(BN)
|
||||
μ = reshape(BN.μ, affine_shape...)
|
||||
σ² = reshape(BN.σ², affine_shape...)
|
||||
ϵ = BN.ϵ
|
||||
|
@ -170,6 +198,9 @@ end
|
|||
|
||||
@functor BatchNorm
|
||||
|
||||
testmode!(m::BatchNorm, mode = true) =
|
||||
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
|
||||
|
||||
function Base.show(io::IO, l::BatchNorm)
|
||||
print(io, "BatchNorm($(join(size(l.β), ", "))")
|
||||
(l.λ == identity) || print(io, ", λ = $(l.λ)")
|
||||
|
@ -193,6 +224,8 @@ it's the usual channel dimension.)
|
|||
shifts them to have a new mean and variance (corresponding to the learnable,
|
||||
per-channel `bias` and `scale` parameters).
|
||||
|
||||
Use [`testmode!`](@ref) during inference.
|
||||
|
||||
See [Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022).
|
||||
|
||||
Example:
|
||||
|
@ -215,12 +248,13 @@ mutable struct InstanceNorm{F,V,W,N}
|
|||
σ²::W # moving std
|
||||
ϵ::N
|
||||
momentum::N
|
||||
active::Union{Bool, Nothing}
|
||||
end
|
||||
|
||||
InstanceNorm(chs::Integer, λ = identity;
|
||||
initβ = (i) -> zeros(Float32, i), initγ = (i) -> ones(Float32, i), ϵ = 1f-5, momentum = 0.1f0) =
|
||||
InstanceNorm(λ, initβ(chs), initγ(chs),
|
||||
zeros(chs), ones(chs), ϵ, momentum)
|
||||
zeros(chs), ones(chs), ϵ, momentum, nothing)
|
||||
|
||||
trainable(in::InstanceNorm) = (in.β, in.γ)
|
||||
|
||||
|
@ -237,7 +271,7 @@ function (in::InstanceNorm)(x)
|
|||
m = div(prod(size(x)), c*bs)
|
||||
γ, β = expand_inst(in.γ, affine_shape), expand_inst(in.β, affine_shape)
|
||||
|
||||
if !istraining()
|
||||
if !_isactive(in)
|
||||
μ = expand_inst(in.μ, affine_shape)
|
||||
σ² = expand_inst(in.σ², affine_shape)
|
||||
ϵ = in.ϵ
|
||||
|
@ -263,6 +297,9 @@ end
|
|||
|
||||
@functor InstanceNorm
|
||||
|
||||
testmode!(m::InstanceNorm, mode = true) =
|
||||
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
|
||||
|
||||
function Base.show(io::IO, l::InstanceNorm)
|
||||
print(io, "InstanceNorm($(join(size(l.β), ", "))")
|
||||
(l.λ == identity) || print(io, ", λ = $(l.λ)")
|
||||
|
@ -283,6 +320,8 @@ For an array of N dimensions, the (N-1)th index is the channel dimension.
|
|||
``G`` is the number of groups along which the statistics would be computed.
|
||||
The number of channels must be an integer multiple of the number of groups.
|
||||
|
||||
Use [`testmode!`](@ref) during inference.
|
||||
|
||||
Example:
|
||||
```
|
||||
m = Chain(Conv((3,3), 1=>32, leakyrelu;pad = 1),
|
||||
|
@ -300,12 +339,13 @@ mutable struct GroupNorm{F,V,W,N,T}
|
|||
σ²::W # moving std
|
||||
ϵ::N
|
||||
momentum::N
|
||||
active::Union{Bool, Nothing}
|
||||
end
|
||||
|
||||
GroupNorm(chs::Integer, G::Integer, λ = identity;
|
||||
initβ = (i) -> zeros(Float32, i), initγ = (i) -> ones(Float32, i), ϵ = 1f-5, momentum = 0.1f0) =
|
||||
GroupNorm(G, λ, initβ(chs), initγ(chs),
|
||||
zeros(G,1), ones(G,1), ϵ, momentum)
|
||||
zeros(G,1), ones(G,1), ϵ, momentum, nothing)
|
||||
|
||||
trainable(gn::GroupNorm) = (gn.β, gn.γ)
|
||||
|
||||
|
@ -329,7 +369,7 @@ function(gn::GroupNorm)(x)
|
|||
β = reshape(gn.β, affine_shape...)
|
||||
|
||||
y = reshape(x,((size(x))[1:end-2]...,channels_per_group,groups,batches))
|
||||
if !istraining()
|
||||
if !_isactive(gn)
|
||||
og_shape = size(x)
|
||||
μ = reshape(gn.μ, μ_affine_shape...) # Shape : (1,1,...C/G,G,1)
|
||||
σ² = reshape(gn.σ², μ_affine_shape...) # Shape : (1,1,...C/G,G,1)
|
||||
|
@ -360,6 +400,9 @@ end
|
|||
|
||||
@functor GroupNorm
|
||||
|
||||
testmode!(m::GroupNorm, mode = true) =
|
||||
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
|
||||
|
||||
function Base.show(io::IO, l::GroupNorm)
|
||||
print(io, "GroupNorm($(join(size(l.β), ", "))")
|
||||
(l.λ == identity) || print(io, ", λ = $(l.λ)")
|
||||
|
|
|
@ -45,8 +45,7 @@ Base.show(io::IO, m::Recur) = print(io, "Recur(", m.cell, ")")
|
|||
"""
|
||||
reset!(rnn)
|
||||
|
||||
Reset the hidden state of a recurrent layer back to its original value. See also
|
||||
`truncate!`.
|
||||
Reset the hidden state of a recurrent layer back to its original value.
|
||||
|
||||
Assuming you have a `Recur` layer `rnn`, this is roughly equivalent to
|
||||
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
using CuArrays
|
||||
using NNlib: logsoftmax, logσ
|
||||
|
||||
# Cost functions
|
||||
"""
|
||||
mse(ŷ, y)
|
||||
|
||||
Return the mean squared error `sum((ŷ .- y).^2) / length(y)`.
|
||||
"""
|
||||
mse(ŷ, y) = sum((ŷ .- y).^2) * 1 // length(y)
|
||||
|
||||
|
||||
function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Nothing)
|
||||
return -sum(y .* log.(ŷ)) * 1 // size(y, 2)
|
||||
end
|
||||
|
@ -17,10 +19,26 @@ function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Abstr
|
|||
return -sum(y .* log.(ŷ) .* weight) * 1 // size(y, 2)
|
||||
end
|
||||
|
||||
"""
|
||||
crossentropy(ŷ, y; weight=1)
|
||||
|
||||
Return the crossentropy computed as `-sum(y .* log.(ŷ) .* weight) / size(y, 2)`.
|
||||
|
||||
See also [`logitcrossentropy`](@ref), [`binarycrossentropy`](@ref).
|
||||
"""
|
||||
crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight=nothing) = _crossentropy(ŷ, y, weight)
|
||||
|
||||
function logitcrossentropy(logŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1)
|
||||
return -sum(y .* logsoftmax(logŷ) .* weight) * 1 // size(y, 2)
|
||||
"""
|
||||
logitcrossentropy(ŷ, y; weight=1)
|
||||
|
||||
Return the crossentropy computed after a [softmax](@ref) operation:
|
||||
|
||||
-sum(y .* logsoftmax(ŷ) .* weight) / size(y, 2)
|
||||
|
||||
See also [`crossentropy`](@ref), [`binarycrossentropy`](@ref).
|
||||
"""
|
||||
function logitcrossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1)
|
||||
return -sum(y .* logsoftmax(ŷ) .* weight) * 1 // size(y, 2)
|
||||
end
|
||||
|
||||
"""
|
||||
|
@ -28,11 +46,7 @@ end
|
|||
|
||||
Return `-y*log(ŷ + ϵ) - (1-y)*log(1-ŷ + ϵ)`. The ϵ term provides numerical stability.
|
||||
|
||||
julia> binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0.])
|
||||
3-element Array{Float64,1}:
|
||||
1.4244
|
||||
0.352317
|
||||
0.86167
|
||||
Typically, the prediction `ŷ` is given by the output of a [`sigmoid`](@ref) activation.
|
||||
"""
|
||||
binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)
|
||||
|
||||
|
@ -40,44 +54,42 @@ binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ
|
|||
CuArrays.@cufunc binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)
|
||||
|
||||
"""
|
||||
logitbinarycrossentropy(logŷ, y)
|
||||
logitbinarycrossentropy(ŷ, y)
|
||||
|
||||
`logitbinarycrossentropy(logŷ, y)` is mathematically equivalent to `binarycrossentropy(σ(logŷ), y)`
|
||||
`logitbinarycrossentropy(ŷ, y)` is mathematically equivalent to `binarycrossentropy(σ(ŷ), y)`
|
||||
but it is more numerically stable.
|
||||
|
||||
julia> logitbinarycrossentropy.([-1.1491, 0.8619, 0.3127], [1, 1, 0.])
|
||||
3-element Array{Float64,1}:
|
||||
1.4244
|
||||
0.352317
|
||||
0.86167
|
||||
See also [`binarycrossentropy`](@ref), [`sigmoid`](@ref), [`logsigmoid`](@ref).
|
||||
"""
|
||||
logitbinarycrossentropy(logŷ, y) = (1 - y)*logŷ - logσ(logŷ)
|
||||
logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)
|
||||
|
||||
# Re-definition to fix interaction with CuArrays.
|
||||
CuArrays.@cufunc logitbinarycrossentropy(logŷ, y) = (1 - y)*logŷ - logσ(logŷ)
|
||||
CuArrays.@cufunc logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)
|
||||
|
||||
"""
|
||||
normalise(x::AbstractArray; dims=1)
|
||||
normalise(x; dims=1)
|
||||
|
||||
Normalises `x` to mean 0 and standard deviation 1, across the dimensions given by `dims`. Defaults to normalising over columns.
|
||||
|
||||
julia> a = reshape(collect(1:9), 3, 3)
|
||||
3×3 Array{Int64,2}:
|
||||
1 4 7
|
||||
2 5 8
|
||||
3 6 9
|
||||
```julia-repl
|
||||
julia> a = reshape(collect(1:9), 3, 3)
|
||||
3×3 Array{Int64,2}:
|
||||
1 4 7
|
||||
2 5 8
|
||||
3 6 9
|
||||
|
||||
julia> normalise(a)
|
||||
3×3 Array{Float64,2}:
|
||||
-1.22474 -1.22474 -1.22474
|
||||
0.0 0.0 0.0
|
||||
1.22474 1.22474 1.22474
|
||||
julia> normalise(a)
|
||||
3×3 Array{Float64,2}:
|
||||
-1.22474 -1.22474 -1.22474
|
||||
0.0 0.0 0.0
|
||||
1.22474 1.22474 1.22474
|
||||
|
||||
julia> normalise(a, dims=2)
|
||||
3×3 Array{Float64,2}:
|
||||
-1.22474 0.0 1.22474
|
||||
-1.22474 0.0 1.22474
|
||||
-1.22474 0.0 1.22474
|
||||
julia> normalise(a, dims=2)
|
||||
3×3 Array{Float64,2}:
|
||||
-1.22474 0.0 1.22474
|
||||
-1.22474 0.0 1.22474
|
||||
-1.22474 0.0 1.22474
|
||||
```
|
||||
"""
|
||||
function normalise(x::AbstractArray; dims=1)
|
||||
μ′ = mean(x, dims = dims)
|
||||
|
@ -87,6 +99,7 @@ end
|
|||
|
||||
"""
|
||||
kldivergence(ŷ, y)
|
||||
|
||||
KLDivergence is a measure of how much one probability distribution is different from the other.
|
||||
It is always non-negative and zero only when both the distributions are equal everywhere.
|
||||
[KL Divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence).
|
||||
|
@ -99,6 +112,7 @@ end
|
|||
|
||||
"""
|
||||
poisson(ŷ, y)
|
||||
|
||||
Poisson loss function is a measure of how the predicted distribution diverges from the expected distribution.
|
||||
[Poisson Loss](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson).
|
||||
"""
|
||||
|
@ -106,7 +120,8 @@ poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2)
|
|||
|
||||
"""
|
||||
hinge(ŷ, y)
|
||||
Measures the loss given the prediction ŷ and true labels y(containing 1 or -1).
|
||||
|
||||
Measures the loss given the prediction `ŷ` and true labels `y` (containing 1 or -1).
|
||||
[Hinge Loss](https://en.wikipedia.org/wiki/Hinge_loss).
|
||||
"""
|
||||
hinge(ŷ, y) = sum(max.(0, 1 .- ŷ .* y)) *1 // size(y,2)
|
||||
|
|
|
@ -125,6 +125,4 @@ onecold(y::AbstractMatrix, labels...) =
|
|||
onecold(y::OneHotMatrix, labels...) =
|
||||
mapreduce(x -> Flux.onecold(x, labels...), |, y.data, dims = 2, init = 0)
|
||||
|
||||
# TODO probably still want this as a custom adjoint Zygote
|
||||
# onecold(x::TrackedVector, l...) = onecold(data(x), l...)
|
||||
# onecold(x::TrackedMatrix, l...) = onecold(data(x), l...)
|
||||
@nograd onecold, onehot, onehotbatch
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
module Optimise
|
||||
|
||||
export train!,
|
||||
export train!, update!,
|
||||
SGD, Descent, ADAM, Momentum, Nesterov, RMSProp,
|
||||
ADAGrad, AdaMax, ADADelta, AMSGrad, NADAM, ADAMW,RADAM,
|
||||
InvDecay, ExpDecay, WeightDecay, stop, Optimiser
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
using Flux
|
||||
using Base: @get!
|
||||
using MacroTools: @forward
|
||||
|
||||
const ϵ = 1e-8
|
||||
|
@ -7,7 +6,7 @@ const ϵ = 1e-8
|
|||
# TODO: should use weak refs
|
||||
|
||||
"""
|
||||
Descent(η)
|
||||
Descent(η)
|
||||
|
||||
Classic gradient descent optimiser with learning rate `η`.
|
||||
For each parameter `p` and its gradient `δp`, this runs `p -= η*δp`
|
||||
|
@ -78,7 +77,7 @@ Gradient descent with learning rate `η` and Nesterov momentum `ρ`.
|
|||
|
||||
## Parameters
|
||||
- Learning Rate (η): Amount by which the gradients are dicsounted berfore updating the weights. Defaults to `0.001`.
|
||||
- Nesterov Momentum (ρ): Paramters controlling the amount of nesterov momentum to be applied. Defaults to `0.9`.
|
||||
- Nesterov Momentum (ρ): Parameters controlling the amount of nesterov momentum to be applied. Defaults to `0.9`.
|
||||
|
||||
## Examples
|
||||
```julia
|
||||
|
@ -106,7 +105,7 @@ end
|
|||
"""
|
||||
RMSProp(η, ρ)
|
||||
|
||||
Implements the RMSProp algortihm. Often a good choice for recurrent networks. Paramters other than learning rate generally don't need tuning.
|
||||
Implements the RMSProp algortihm. Often a good choice for recurrent networks. Parameters other than learning rate generally don't need tuning.
|
||||
|
||||
## Parameters
|
||||
- Learning Rate (η): Defaults to `0.001`.
|
||||
|
@ -442,17 +441,16 @@ function apply!(o::Optimiser, x, Δ)
|
|||
end
|
||||
|
||||
"""
|
||||
InvDecay(γ)
|
||||
InvDecay(γ)
|
||||
|
||||
Applies inverse time decay to an optimiser, i.e., the effective step size at iteration `n` is `eta / (1 + γ * n)` where `eta` is the initial step size. The wrapped optimiser's step size is not modified.
|
||||
```
|
||||
|
||||
## Parameters
|
||||
- gamma (γ): Defaults to `0.001`
|
||||
|
||||
## Example
|
||||
```julia
|
||||
Optimiser(InvDecay(..), Opt(..))
|
||||
Optimiser(InvDecay(..), Opt(..))
|
||||
```
|
||||
"""
|
||||
mutable struct InvDecay
|
||||
|
@ -471,7 +469,7 @@ function apply!(o::InvDecay, x, Δ)
|
|||
end
|
||||
|
||||
"""
|
||||
ExpDecay(eta, decay, decay_step, clip)
|
||||
ExpDecay(eta, decay, decay_step, clip)
|
||||
|
||||
Discount the learning rate `eta` by a multiplicative factor `decay` every `decay_step` till a minimum of `clip`.
|
||||
|
||||
|
@ -484,9 +482,8 @@ Discount the learning rate `eta` by a multiplicative factor `decay` every `decay
|
|||
## Example
|
||||
To apply exponential decay to an optimiser:
|
||||
```julia
|
||||
Optimiser(ExpDecay(..), Opt(..))
|
||||
|
||||
opt = Optimiser(ExpDecay(), ADAM())
|
||||
Optimiser(ExpDecay(..), Opt(..))
|
||||
opt = Optimiser(ExpDecay(), ADAM())
|
||||
```
|
||||
"""
|
||||
mutable struct ExpDecay
|
||||
|
@ -510,7 +507,7 @@ function apply!(o::ExpDecay, x, Δ)
|
|||
end
|
||||
|
||||
"""
|
||||
WeightDecay(wd)
|
||||
WeightDecay(wd)
|
||||
|
||||
Decays the weight by `wd`
|
||||
|
||||
|
|
|
@ -1,9 +1,22 @@
|
|||
using Juno
|
||||
import Zygote: Params, gradient
|
||||
|
||||
|
||||
"""
|
||||
update!(opt, p, g)
|
||||
update!(opt, ps::Params, gs)
|
||||
|
||||
Perform an update step of the parameters `ps` (or the single parameter `p`)
|
||||
according to optimizer `opt` and the gradients `gs` (the gradient `g`).
|
||||
|
||||
As a result, the parameters are mutated and the optimizer's internal state may change.
|
||||
|
||||
update!(x, x̄)
|
||||
|
||||
Update the array `x` according to `x .-= x̄`.
|
||||
"""
|
||||
function update!(x::AbstractArray, x̄)
|
||||
x .+= x̄
|
||||
return x
|
||||
x .-= x̄
|
||||
end
|
||||
|
||||
function update!(opt, x, x̄)
|
||||
|
@ -48,13 +61,14 @@ end
|
|||
For each datapoint `d` in `data` computes the gradient of `loss(d...)` through
|
||||
backpropagation and calls the optimizer `opt`.
|
||||
|
||||
In case datapoints `d` are of numeric array type, assumes no splatting is needed
|
||||
and computes the gradient of `loss(d)`.
|
||||
|
||||
Takes a callback as keyword argument `cb`. For example, this will print "training"
|
||||
every 10 seconds:
|
||||
|
||||
```julia
|
||||
Flux.train!(loss, params, data, opt,
|
||||
cb = throttle(() -> println("training"), 10))
|
||||
```
|
||||
train!(loss, params, data, opt,
|
||||
cb = throttle(() -> println("training"), 10))
|
||||
|
||||
The callback can call `Flux.stop()` to interrupt the training loop.
|
||||
|
||||
|
@ -65,8 +79,14 @@ function train!(loss, ps, data, opt; cb = () -> ())
|
|||
cb = runall(cb)
|
||||
@progress for d in data
|
||||
try
|
||||
gs = gradient(ps) do
|
||||
loss(d...)
|
||||
if d isa AbstractArray{<:Number}
|
||||
gs = gradient(ps) do
|
||||
loss(d)
|
||||
end
|
||||
else
|
||||
gs = gradient(ps) do
|
||||
loss(d...)
|
||||
end
|
||||
end
|
||||
update!(opt, ps, gs)
|
||||
cb()
|
||||
|
|
|
@ -60,7 +60,7 @@ head(x::Tuple) = reverse(Base.tail(reverse(x)))
|
|||
squeezebatch(x) = reshape(x, head(size(x)))
|
||||
|
||||
"""
|
||||
batch(xs)
|
||||
batch(xs)
|
||||
|
||||
Batch the arrays in `xs` into a single array.
|
||||
|
||||
|
|
|
@ -58,6 +58,13 @@ end
|
|||
@test y[3,:] isa CuArray
|
||||
end
|
||||
|
||||
@testset "restructure gpu" begin
|
||||
dudt = Dense(1,1) |> gpu
|
||||
p,re = Flux.destructure(dudt)
|
||||
foo(x) = sum(re(p)(x))
|
||||
@test gradient(foo, cu(rand(1)))[1] isa CuArray
|
||||
end
|
||||
|
||||
if CuArrays.has_cudnn()
|
||||
@info "Testing Flux/CUDNN"
|
||||
include("cudnn.jl")
|
||||
|
|
91
test/data.jl
91
test/data.jl
|
@ -1,22 +1,85 @@
|
|||
using Flux.Data
|
||||
using Test
|
||||
@testset "DataLoader" begin
|
||||
X = reshape([1:10;], (2, 5))
|
||||
Y = [1:5;]
|
||||
|
||||
@test cmudict()["CATASTROPHE"] == :[K,AH0,T,AE1,S,T,R,AH0,F,IY0].args
|
||||
d = DataLoader(X, batchsize=2)
|
||||
batches = collect(d)
|
||||
@test length(batches) == 3
|
||||
@test batches[1] == X[:,1:2]
|
||||
@test batches[2] == X[:,3:4]
|
||||
@test batches[3] == X[:,5:5]
|
||||
|
||||
@test length(CMUDict.phones()) == 39
|
||||
d = DataLoader(X, batchsize=2, partial=false)
|
||||
batches = collect(d)
|
||||
@test length(batches) == 2
|
||||
@test batches[1] == X[:,1:2]
|
||||
@test batches[2] == X[:,3:4]
|
||||
|
||||
@test length(CMUDict.symbols()) == 84
|
||||
d = DataLoader(X, Y, batchsize=2)
|
||||
batches = collect(d)
|
||||
@test length(batches) == 3
|
||||
@test length(batches[1]) == 2
|
||||
@test length(batches[2]) == 2
|
||||
@test length(batches[3]) == 2
|
||||
@test batches[1][1] == X[:,1:2]
|
||||
@test batches[1][2] == Y[1:2]
|
||||
@test batches[2][1] == X[:,3:4]
|
||||
@test batches[2][2] == Y[3:4]
|
||||
@test batches[3][1] == X[:,5:5]
|
||||
@test batches[3][2] == Y[5:5]
|
||||
|
||||
@test MNIST.images()[1] isa Matrix
|
||||
@test MNIST.labels() isa Vector{Int64}
|
||||
# test interaction with `train!`
|
||||
θ = ones(2)
|
||||
X = zeros(2, 10)
|
||||
loss(x) = sum((x .- θ).^2)
|
||||
d = DataLoader(X)
|
||||
Flux.train!(loss, [θ], ncycle(d, 10), Descent(0.1))
|
||||
@test norm(θ) < 1e-4
|
||||
|
||||
@test FashionMNIST.images()[1] isa Matrix
|
||||
@test FashionMNIST.labels() isa Vector{Int64}
|
||||
# test interaction with `train!`
|
||||
θ = zeros(2)
|
||||
X = ones(2, 10)
|
||||
Y = fill(2, 10)
|
||||
loss(x, y) = sum((y - x'*θ).^2)
|
||||
d = DataLoader(X, Y)
|
||||
Flux.train!(loss, [θ], ncycle(d, 10), Descent(0.1))
|
||||
@test norm(θ .- 1) < 1e-10
|
||||
end
|
||||
|
||||
@test Data.Sentiment.train() isa Vector{Data.Tree{Any}}
|
||||
@testset "CMUDict" begin
|
||||
@test cmudict()["CATASTROPHE"] == :[K,AH0,T,AE1,S,T,R,AH0,F,IY0].args
|
||||
|
||||
@test Iris.features() isa Matrix
|
||||
@test size(Iris.features()) == (4,150)
|
||||
@test length(CMUDict.phones()) == 39
|
||||
|
||||
@test Iris.labels() isa Vector{String}
|
||||
@test size(Iris.labels()) == (150,)
|
||||
@test length(CMUDict.symbols()) == 84
|
||||
end
|
||||
|
||||
@testset "MNIST" begin
|
||||
@test MNIST.images()[1] isa Matrix
|
||||
@test MNIST.labels() isa Vector{Int64}
|
||||
end
|
||||
|
||||
@testset "FashionMNIST" begin
|
||||
@test FashionMNIST.images()[1] isa Matrix
|
||||
@test FashionMNIST.labels() isa Vector{Int64}
|
||||
end
|
||||
|
||||
@testset "Sentiment" begin
|
||||
@test Data.Sentiment.train() isa Vector{Data.Tree{Any}}
|
||||
end
|
||||
|
||||
@testset "Iris" begin
|
||||
@test Iris.features() isa Matrix
|
||||
@test size(Iris.features()) == (4,150)
|
||||
|
||||
@test Iris.labels() isa Vector{String}
|
||||
@test size(Iris.labels()) == (150,)
|
||||
end
|
||||
|
||||
@testset "Housing" begin
|
||||
@test Housing.features() isa Matrix
|
||||
@test size(Housing.features()) == (506, 13)
|
||||
|
||||
@test Housing.targets() isa Array{Float64}
|
||||
@test size(Housing.targets()) == (506, 1)
|
||||
end
|
||||
|
|
|
@ -92,4 +92,19 @@ import Flux: activations
|
|||
@test size(SkipConnection(Dense(10,10), (a,b) -> cat(a, b, dims = 2))(input)) == (10,4)
|
||||
end
|
||||
end
|
||||
|
||||
@testset "output dimensions" begin
|
||||
m = Chain(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32))
|
||||
@test Flux.outdims(m, (10, 10)) == (6, 6)
|
||||
|
||||
m = Dense(10, 5)
|
||||
@test Flux.outdims(m, (5, 2)) == (5,)
|
||||
@test Flux.outdims(m, (10,)) == (5,)
|
||||
|
||||
m = Flux.Diagonal(10)
|
||||
@test Flux.outdims(m, (10,)) == (10,)
|
||||
|
||||
m = Maxout(() -> Conv((3, 3), 3 => 16), 2)
|
||||
@test Flux.outdims(m, (10, 10)) == (8, 8)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -107,3 +107,55 @@ end
|
|||
true
|
||||
end
|
||||
end
|
||||
|
||||
@testset "conv output dimensions" begin
|
||||
m = Conv((3, 3), 3 => 16)
|
||||
@test Flux.outdims(m, (10, 10)) == (8, 8)
|
||||
m = Conv((3, 3), 3 => 16; stride = 2)
|
||||
@test Flux.outdims(m, (5, 5)) == (2, 2)
|
||||
m = Conv((3, 3), 3 => 16; stride = 2, pad = 3)
|
||||
@test Flux.outdims(m, (5, 5)) == (5, 5)
|
||||
m = Conv((3, 3), 3 => 16; stride = 2, pad = 3, dilation = 2)
|
||||
@test Flux.outdims(m, (5, 5)) == (4, 4)
|
||||
|
||||
m = ConvTranspose((3, 3), 3 => 16)
|
||||
@test Flux.outdims(m, (8, 8)) == (10, 10)
|
||||
m = ConvTranspose((3, 3), 3 => 16; stride = 2)
|
||||
@test Flux.outdims(m, (2, 2)) == (5, 5)
|
||||
m = ConvTranspose((3, 3), 3 => 16; stride = 2, pad = 3)
|
||||
@test Flux.outdims(m, (5, 5)) == (5, 5)
|
||||
m = ConvTranspose((3, 3), 3 => 16; stride = 2, pad = 3, dilation = 2)
|
||||
@test Flux.outdims(m, (4, 4)) == (5, 5)
|
||||
|
||||
m = DepthwiseConv((3, 3), 3 => 6)
|
||||
@test Flux.outdims(m, (10, 10)) == (8, 8)
|
||||
m = DepthwiseConv((3, 3), 3 => 6; stride = 2)
|
||||
@test Flux.outdims(m, (5, 5)) == (2, 2)
|
||||
m = DepthwiseConv((3, 3), 3 => 6; stride = 2, pad = 3)
|
||||
@test Flux.outdims(m, (5, 5)) == (5, 5)
|
||||
m = DepthwiseConv((3, 3), 3 => 6; stride = 2, pad = 3, dilation = 2)
|
||||
@test Flux.outdims(m, (5, 5)) == (4, 4)
|
||||
|
||||
m = CrossCor((3, 3), 3 => 16)
|
||||
@test Flux.outdims(m, (10, 10)) == (8, 8)
|
||||
m = CrossCor((3, 3), 3 => 16; stride = 2)
|
||||
@test Flux.outdims(m, (5, 5)) == (2, 2)
|
||||
m = CrossCor((3, 3), 3 => 16; stride = 2, pad = 3)
|
||||
@test Flux.outdims(m, (5, 5)) == (5, 5)
|
||||
m = CrossCor((3, 3), 3 => 16; stride = 2, pad = 3, dilation = 2)
|
||||
@test Flux.outdims(m, (5, 5)) == (4, 4)
|
||||
|
||||
m = MaxPool((2, 2))
|
||||
@test Flux.outdims(m, (10, 10)) == (5, 5)
|
||||
m = MaxPool((2, 2); stride = 1)
|
||||
@test Flux.outdims(m, (5, 5)) == (4, 4)
|
||||
m = MaxPool((2, 2); stride = 2, pad = 3)
|
||||
@test Flux.outdims(m, (5, 5)) == (5, 5)
|
||||
|
||||
m = MeanPool((2, 2))
|
||||
@test Flux.outdims(m, (10, 10)) == (5, 5)
|
||||
m = MeanPool((2, 2); stride = 1)
|
||||
@test Flux.outdims(m, (5, 5)) == (4, 4)
|
||||
m = MeanPool((2, 2); stride = 2, pad = 3)
|
||||
@test Flux.outdims(m, (5, 5)) == (5, 5)
|
||||
end
|
|
@ -1,30 +1,32 @@
|
|||
using Flux, Test, Statistics
|
||||
using Zygote: pullback
|
||||
|
||||
trainmode(f, x...) = pullback(f, x...)[1]
|
||||
trainmode(f) = (x...) -> trainmode(f, x...)
|
||||
evalwgrad(f, x...) = pullback(f, x...)[1]
|
||||
|
||||
@testset "Dropout" begin
|
||||
x = [1.,2.,3.]
|
||||
@test x == Dropout(0.1)(x)
|
||||
@test x == trainmode(Dropout(0), x)
|
||||
@test zero(x) == trainmode(Dropout(1), x)
|
||||
@test x == evalwgrad(Dropout(0), x)
|
||||
@test zero(x) == evalwgrad(Dropout(1), x)
|
||||
|
||||
x = rand(100)
|
||||
m = Dropout(0.9)
|
||||
y = trainmode(m, x)
|
||||
y = evalwgrad(m, x)
|
||||
@test count(a->a==0, y) > 50
|
||||
y = m(x)
|
||||
testmode!(m, true)
|
||||
y = evalwgrad(m, x) # should override istraining
|
||||
@test count(a->a==0, y) == 0
|
||||
y = trainmode(m, x)
|
||||
testmode!(m, false)
|
||||
y = evalwgrad(m, x)
|
||||
@test count(a->a==0, y) > 50
|
||||
|
||||
x = rand(Float32, 100)
|
||||
m = Chain(Dense(100,100),
|
||||
Dropout(0.9))
|
||||
y = trainmode(m, x)
|
||||
y = evalwgrad(m, x)
|
||||
@test count(a->a == 0, y) > 50
|
||||
y = m(x)
|
||||
testmode!(m, true)
|
||||
y = evalwgrad(m, x) # should override istraining
|
||||
@test count(a->a == 0, y) == 0
|
||||
|
||||
x = rand(100, 50)
|
||||
|
@ -49,7 +51,7 @@ end
|
|||
# initial m.σ is 1
|
||||
# initial m.μ is 0
|
||||
|
||||
y = trainmode(m, x)
|
||||
y = evalwgrad(m, x)
|
||||
@test isapprox(y, [-1.22474 0 1.22474; -1.22474 0 1.22474], atol = 1.0e-5)
|
||||
# julia> x
|
||||
# 2×3 Array{Float64,2}:
|
||||
|
@ -82,19 +84,19 @@ end
|
|||
@test isapprox(y, sigmoid.((x .- m.μ) ./ sqrt.(m.σ² .+ m.ϵ)), atol = 1.0e-7)
|
||||
end
|
||||
|
||||
let m = trainmode(BatchNorm(2)), x = reshape(Float32.(1:6), 3, 2, 1)
|
||||
let m = trainmode!(BatchNorm(2)), x = reshape(Float32.(1:6), 3, 2, 1)
|
||||
y = reshape(permutedims(x, [2, 1, 3]), 2, :)
|
||||
y = permutedims(reshape(m(y), 2, 3, 1), [2, 1, 3])
|
||||
@test m(x) == y
|
||||
end
|
||||
|
||||
let m = trainmode(BatchNorm(2)), x = reshape(Float32.(1:12), 2, 3, 2, 1)
|
||||
let m = trainmode!(BatchNorm(2)), x = reshape(Float32.(1:12), 2, 3, 2, 1)
|
||||
y = reshape(permutedims(x, [3, 1, 2, 4]), 2, :)
|
||||
y = permutedims(reshape(m(y), 2, 2, 3, 1), [2, 3, 1, 4])
|
||||
@test m(x) == y
|
||||
end
|
||||
|
||||
let m = trainmode(BatchNorm(2)), x = reshape(Float32.(1:24), 2, 2, 3, 2, 1)
|
||||
let m = trainmode!(BatchNorm(2)), x = reshape(Float32.(1:24), 2, 2, 3, 2, 1)
|
||||
y = reshape(permutedims(x, [4, 1, 2, 3, 5]), 2, :)
|
||||
y = permutedims(reshape(m(y), 2, 2, 2, 3, 1), [2, 3, 4, 1, 5])
|
||||
@test m(x) == y
|
||||
|
@ -117,7 +119,7 @@ end
|
|||
x = Float64.(x)
|
||||
@test m.β == [0, 0] # initβ(2)
|
||||
@test m.γ == [1, 1] # initγ(2)
|
||||
y = trainmode(m, x)
|
||||
y = evalwgrad(m, x)
|
||||
|
||||
#julia> x
|
||||
#[:, :, 1] =
|
||||
|
@ -162,7 +164,7 @@ end
|
|||
@test isapprox(y, sigmoid.((x .- expand_inst(m.μ, affine_shape)) ./ sqrt.(expand_inst(m.σ², affine_shape) .+ m.ϵ)), atol = 1.0e-7)
|
||||
end
|
||||
|
||||
let m = trainmode(InstanceNorm(2)), sizes = (2, 4, 1, 2, 3),
|
||||
let m = trainmode!(InstanceNorm(2)), sizes = (2, 4, 1, 2, 3),
|
||||
x = Float32.(reshape(collect(1:prod(sizes)), sizes))
|
||||
y = reshape(permutedims(x, [3, 1, 2, 4, 5]), :, 2, 3)
|
||||
y = reshape(m(y), sizes...)
|
||||
|
@ -172,14 +174,14 @@ end
|
|||
# check that μ, σ², and the output are the correct size for higher rank tensors
|
||||
let m = InstanceNorm(2), sizes = (5, 5, 3, 4, 2, 6),
|
||||
x = reshape(Float32.(collect(1:prod(sizes))), sizes)
|
||||
y = trainmode(m, x)
|
||||
y = evalwgrad(m, x)
|
||||
@test size(m.μ) == (sizes[end - 1], )
|
||||
@test size(m.σ²) == (sizes[end - 1], )
|
||||
@test size(y) == sizes
|
||||
end
|
||||
|
||||
# show that instance norm is equal to batch norm when channel and batch dims are squashed
|
||||
let m_inorm = trainmode(InstanceNorm(2)), m_bnorm = trainmode(BatchNorm(12)), sizes = (5, 5, 3, 4, 2, 6),
|
||||
let m_inorm = trainmode!(InstanceNorm(2)), m_bnorm = trainmode!(BatchNorm(12)), sizes = (5, 5, 3, 4, 2, 6),
|
||||
x = reshape(Float32.(collect(1:prod(sizes))), sizes)
|
||||
@test m_inorm(x) == reshape(m_bnorm(reshape(x, (sizes[1:end - 2]..., :, 1))), sizes)
|
||||
end
|
||||
|
@ -204,7 +206,7 @@ if VERSION >= v"1.1"
|
|||
@test m.β == [0, 0, 0, 0] # initβ(32)
|
||||
@test m.γ == [1, 1, 1, 1] # initγ(32)
|
||||
|
||||
y = trainmode(m, x)
|
||||
y = evalwgrad(m, x)
|
||||
|
||||
#julia> x
|
||||
#[:, :, 1] =
|
||||
|
@ -263,7 +265,7 @@ if VERSION >= v"1.1"
|
|||
@test isapprox(y, out, atol = 1.0e-7)
|
||||
end
|
||||
|
||||
let m = trainmode(GroupNorm(2,2)), sizes = (2, 4, 1, 2, 3),
|
||||
let m = trainmode!(GroupNorm(2,2)), sizes = (2, 4, 1, 2, 3),
|
||||
x = Float32.(reshape(collect(1:prod(sizes)), sizes))
|
||||
y = reshape(permutedims(x, [3, 1, 2, 4, 5]), :, 2, 3)
|
||||
y = reshape(m(y), sizes...)
|
||||
|
@ -273,20 +275,20 @@ if VERSION >= v"1.1"
|
|||
# check that μ, σ², and the output are the correct size for higher rank tensors
|
||||
let m = GroupNorm(4,2), sizes = (5, 5, 3, 4, 4, 6),
|
||||
x = Float32.(reshape(collect(1:prod(sizes)), sizes))
|
||||
y = trainmode(m, x)
|
||||
y = evalwgrad(m, x)
|
||||
@test size(m.μ) == (m.G,1)
|
||||
@test size(m.σ²) == (m.G,1)
|
||||
@test size(y) == sizes
|
||||
end
|
||||
|
||||
# show that group norm is the same as instance norm when the group size is the same as the number of channels
|
||||
let IN = trainmode(InstanceNorm(4)), GN = trainmode(GroupNorm(4,4)), sizes = (2,2,3,4,5),
|
||||
let IN = trainmode!(InstanceNorm(4)), GN = trainmode!(GroupNorm(4,4)), sizes = (2,2,3,4,5),
|
||||
x = Float32.(reshape(collect(1:prod(sizes)), sizes))
|
||||
@test IN(x) ≈ GN(x)
|
||||
end
|
||||
|
||||
# show that group norm is the same as batch norm for a group of size 1 and batch of size 1
|
||||
let BN = trainmode(BatchNorm(4)), GN = trainmode(GroupNorm(4,4)), sizes = (2,2,3,4,1),
|
||||
let BN = trainmode!(BatchNorm(4)), GN = trainmode!(GroupNorm(4,4)), sizes = (2,2,3,4,1),
|
||||
x = Float32.(reshape(collect(1:prod(sizes)), sizes))
|
||||
@test BN(x) ≈ GN(x)
|
||||
end
|
||||
|
|
|
@ -1,32 +1,49 @@
|
|||
using Flux, Test, Random, Statistics, Documenter
|
||||
using Random
|
||||
using Flux
|
||||
using Flux.Data
|
||||
using Test
|
||||
using Random, Statistics, LinearAlgebra
|
||||
using Documenter
|
||||
using IterTools: ncycle
|
||||
|
||||
Random.seed!(0)
|
||||
|
||||
@testset "Flux" begin
|
||||
|
||||
@info "Testing Basics"
|
||||
@testset "Utils" begin
|
||||
include("utils.jl")
|
||||
end
|
||||
|
||||
include("utils.jl")
|
||||
include("onehot.jl")
|
||||
include("optimise.jl")
|
||||
include("data.jl")
|
||||
@testset "Onehot" begin
|
||||
include("onehot.jl")
|
||||
end
|
||||
|
||||
@info "Testing Layers"
|
||||
@testset "Optimise" begin
|
||||
include("optimise.jl")
|
||||
end
|
||||
|
||||
include("layers/basic.jl")
|
||||
include("layers/normalisation.jl")
|
||||
include("layers/stateless.jl")
|
||||
include("layers/conv.jl")
|
||||
@testset "Data" begin
|
||||
include("data.jl")
|
||||
end
|
||||
|
||||
if Flux.use_cuda[]
|
||||
include("cuda/cuda.jl")
|
||||
else
|
||||
@warn "CUDA unavailable, not testing GPU support"
|
||||
end
|
||||
@testset "Layers" begin
|
||||
include("layers/basic.jl")
|
||||
include("layers/normalisation.jl")
|
||||
include("layers/stateless.jl")
|
||||
include("layers/conv.jl")
|
||||
end
|
||||
|
||||
if VERSION >= v"1.2"
|
||||
doctest(Flux)
|
||||
end
|
||||
@testset "CUDA" begin
|
||||
if Flux.use_cuda[]
|
||||
include("cuda/cuda.jl")
|
||||
else
|
||||
@warn "CUDA unavailable, not testing GPU support"
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
@testset "Docs" begin
|
||||
if VERSION >= v"1.2"
|
||||
doctest(Flux)
|
||||
end
|
||||
end
|
||||
|
||||
end # testset Flux
|
||||
|
|
Loading…
Reference in New Issue