diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE-CC-BY-4.0 b/LICENSE-CC-BY-4.0 new file mode 100644 index 0000000000000000000000000000000000000000..2f244ac814036ecd9ba9f69782e89ce6b1dca9eb --- /dev/null +++ b/LICENSE-CC-BY-4.0 @@ -0,0 +1,395 @@ +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000000000000000000000000000000000000..2bf1581a7801d6b186a293f3afb1702624987a97 --- /dev/null +++ b/NOTICE @@ -0,0 +1,2 @@ +MindSpore Book +Copyright 2019-2020 Huawei Technologies Co., Ltd diff --git a/README.en.md b/README.en.md deleted file mode 100644 index a03c4e6fa1e8d7484e003afedb87e55b63f4c548..0000000000000000000000000000000000000000 --- a/README.en.md +++ /dev/null @@ -1,36 +0,0 @@ -# book - -#### Description -The code repository stores the complete practice code in 'Introduction DeepLearning with MindSpore'. - -#### Software Architecture -Software architecture description - -#### Installation - -1. xxxx -2. xxxx -3. xxxx - -#### Instructions - -1. xxxx -2. xxxx -3. xxxx - -#### Contribution - -1. Fork the repository -2. Create Feat_xxx branch -3. Commit your code -4. Create Pull Request - - -#### Gitee Feature - -1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md -2. Gitee blog [blog.gitee.com](https://blog.gitee.com) -3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) -4. The most valuable open source project [GVP](https://gitee.com/gvp) -5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) -6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/README.md b/README.md index 7804f340785ee0b605cac935b2e820fcaff5cb37..a16321197743beca84c64c6f1d5c0a3f58e39036 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,8 @@ -# book +# MindSpore Book -#### 介绍 The code repository stores the complete practice code in 'Introduction DeepLearning with MindSpore'. -#### 软件架构 -软件架构说明 +## License - -#### 安装教程 - -1. xxxx -2. xxxx -3. xxxx - -#### 使用说明 - -1. xxxx -2. xxxx -3. xxxx - -#### 参与贡献 - -1. Fork 本仓库 -2. 新建 Feat_xxx 分支 -3. 提交代码 -4. 新建 Pull Request - - -#### 码云特技 - -1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. 码云官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解码云上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是码云最有价值开源项目,是码云综合评定出的优秀开源项目 -5. 码云官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. 码云封面人物是一档用来展示码云会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) +- [Apache License 2.0](LICENSE) +- [Creative Commons License version 4.0](LICENSE-CC-BY-4.0) diff --git a/chapter03/.gitkeep b/chapter03/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter03/lenet/config.py b/chapter03/lenet/config.py new file mode 100644 index 0000000000000000000000000000000000000000..82494835a66bbe07d7f1b7b7ff636fc728ba7b88 --- /dev/null +++ b/chapter03/lenet/config.py @@ -0,0 +1,31 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in main.py +""" +from easydict import EasyDict as edict + +mnist_cfg = edict({ + 'num_classes': 10, + 'lr': 0.01, + 'momentum': 0.9, + 'epoch_size': 1, + 'batch_size': 32, + 'buffer_size': 1000, + 'image_height': 32, + 'image_width': 32, + 'save_checkpoint_steps': 1875, + 'keep_checkpoint_max': 10, +}) diff --git a/chapter03/lenet/lenet.py b/chapter03/lenet/lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..deef8729ac888bca34d6de5d0cf6b9a284ce0a25 --- /dev/null +++ b/chapter03/lenet/lenet.py @@ -0,0 +1,77 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""LeNet.""" +import mindspore.ops.operations as P +import mindspore.nn as nn +from mindspore.common.initializer import TruncatedNormal + + +def conv(in_channels, out_channels, kernel_size, stride=1, padding=0): + """weight initial for conv layer""" + weight = weight_variable() + return nn.Conv2d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, + weight_init=weight, has_bias=False, pad_mode="valid") + +def fc_with_initialize(input_channels, out_channels): + """weight initial for fc layer""" + weight = weight_variable() + bias = weight_variable() + return nn.Dense(input_channels, out_channels, weight, bias) + +def weight_variable(): + """weight initial""" + return TruncatedNormal(0.02) + +class LeNet5(nn.Cell): + """ + Lenet network + + Args: + num_class (int): Num classes. Default: 10. + + Returns: + Tensor, output tensor + Examples: + >>> LeNet(num_class=10) + + """ + def __init__(self, num_class=10): + super(LeNet5, self).__init__() + self.num_class = num_class + self.batch_size = 32 + self.conv1 = conv(1, 6, 5) + self.conv2 = conv(6, 16, 5) + self.fc1 = fc_with_initialize(16 * 5 * 5, 120) + self.fc2 = fc_with_initialize(120, 84) + self.fc3 = fc_with_initialize(84, self.num_class) + self.relu = nn.ReLU() + self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.flatten = nn.Flatten() + + def construct(self, x): + x = self.conv1(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.conv2(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.flatten(x) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.relu(x) + x = self.fc3(x) + return x diff --git a/chapter03/lenet/main.py b/chapter03/lenet/main.py new file mode 100644 index 0000000000000000000000000000000000000000..e39a37d3979111520c672131a82a463c1e2ccae5 --- /dev/null +++ b/chapter03/lenet/main.py @@ -0,0 +1,116 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## train and test lenet example ######################## +1. train lenet and get network model files(.ckpt) : +python main.py --data_path /home/workspace/mindspore_dataset/Tutorial_Network/Lenet/MNIST_Data + +2. test lenet according to model file: +python main.py --data_path /home/workspace/mindspore_dataset/Tutorial_Network/Lenet/MNIST_Data + --mode test --ckpt_path checkpoint_lenet_1-1_1875.ckpt +""" +import os +import argparse +from config import mnist_cfg as cfg +from lenet import LeNet5 +import mindspore.dataset as ds +import mindspore.nn as nn +from mindspore import context, Tensor +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor +from mindspore.train import Model +import mindspore.dataset.transforms.vision.c_transforms as CV +import mindspore.dataset.transforms.c_transforms as C +from mindspore.dataset.transforms.vision import Inter +from mindspore.nn.metrics import Accuracy +from mindspore.common import dtype as mstype + + +def create_dataset(data_path, batch_size=32, repeat_size=1, + num_parallel_workers=1): + """ + create dataset for train or test + """ + # define dataset + mnist_ds = ds.MnistDataset(data_path) + + resize_height, resize_width = 32, 32 + rescale = 1.0 / 255.0 + shift = 0.0 + rescale_nml = 1 / 0.3081 + shift_nml = -1 * 0.1307 / 0.3081 + + # define map operations + resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode + rescale_nml_op = CV.Rescale(rescale_nml, shift_nml) + rescale_op = CV.Rescale(rescale, shift) + hwc2chw_op = CV.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + # apply map operations on images + mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + + # apply DatasetOps + buffer_size = 10000 + mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script + mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) + mnist_ds = mnist_ds.repeat(repeat_size) + + return mnist_ds + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='MindSpore MNIST Example') + parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'], + help='device where the code will be implemented (default: Ascend)') + parser.add_argument('--mode', type=str, default="train", choices=['train', 'test'], + help='implement phase, set to train or test') + parser.add_argument('--data_path', type=str, default="./MNIST_Data", + help='path where the dataset is saved') + parser.add_argument('--ckpt_path', type=str, default="", help='if mode is test, must provide\ + path where the trained ckpt file') + parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True') + + args = parser.parse_args() + + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, enable_mem_reuse=False) + + network = LeNet5(cfg.num_classes) + net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + repeat_size = cfg.epoch_size + net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) + config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + if args.mode == 'train': # train + ds_train = create_dataset(os.path.join(args.data_path, args.mode), batch_size=cfg.batch_size, + repeat_size=repeat_size) + print("============== Starting Training ==============") + model.train(cfg['epoch_size'], ds_train, callbacks=[ckpoint_cb, LossMonitor()], + dataset_sink_mode=args.dataset_sink_mode) + elif args.mode == 'test': # test + print("============== Starting Testing ==============") + param_dict = load_checkpoint(args.ckpt_path) + load_param_into_net(network, param_dict) + ds_eval = create_dataset(os.path.join(args.data_path, "test"), 32, 1) + acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode) + print("============== Accuracy:{} ==============".format(acc)) + else: + raise RuntimeError('mode should be train or test, rather than {}'.format(args.mode)) diff --git a/chapter04/.gitkeep b/chapter04/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter04/alexnet/alexnet.py b/chapter04/alexnet/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..5c5e723aae442b3e5bdb7679da8a29d401605750 --- /dev/null +++ b/chapter04/alexnet/alexnet.py @@ -0,0 +1,75 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Alexnet.""" +from config import alexnet_cfg as cfg +import mindspore.ops.operations as P +import mindspore.nn as nn +from mindspore.common.initializer import TruncatedNormal + +def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid"): + weight = weight_variable() + return nn.Conv2d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, + weight_init=weight, has_bias=False, pad_mode=pad_mode) + +def fc_with_initialize(input_channels, out_channels): + weight = weight_variable() + bias = weight_variable() + return nn.Dense(input_channels, out_channels, weight, bias) + +def weight_variable(): + return TruncatedNormal(0.02) # 0.02 + + +class AlexNet(nn.Cell): + """ + Alexnet + """ + def __init__(self, num_classes=10): + super(AlexNet, self).__init__() + self.batch_size = cfg.batch_size + self.conv1 = conv(3, 96, 11, stride=4) + self.conv2 = conv(96, 256, 5, pad_mode="same") + self.conv3 = conv(256, 384, 3, pad_mode="same") + self.conv4 = conv(384, 384, 3, pad_mode="same") + self.conv5 = conv(384, 256, 3, pad_mode="same") + self.relu = nn.ReLU() + self.max_pool2d = nn.MaxPool2d(kernel_size=3, stride=2) + self.flatten = nn.Flatten() + self.fc1 = fc_with_initialize(6*6*256, 4096) + self.fc2 = fc_with_initialize(4096, 4096) + self.fc3 = fc_with_initialize(4096, num_classes) + + def construct(self, x): + x = self.conv1(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.conv2(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.conv3(x) + x = self.relu(x) + x = self.conv4(x) + x = self.relu(x) + x = self.conv5(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.flatten(x) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.relu(x) + x = self.fc3(x) + return x diff --git a/chapter04/alexnet/config.py b/chapter04/alexnet/config.py new file mode 100644 index 0000000000000000000000000000000000000000..ee19e21a380f74da4d8a7460507dffc9be543b6d --- /dev/null +++ b/chapter04/alexnet/config.py @@ -0,0 +1,31 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in main.py +""" +from easydict import EasyDict as edict + +alexnet_cfg = edict({ + 'num_classes': 10, + 'learning_rate': 0.002, + 'momentum': 0.9, + 'epoch_size': 1, + 'batch_size': 32, + 'buffer_size': 1000, + 'image_height': 227, + 'image_width': 227, + 'save_checkpoint_steps': 1562, + 'keep_checkpoint_max': 10, +}) diff --git a/chapter04/alexnet/main.py b/chapter04/alexnet/main.py new file mode 100644 index 0000000000000000000000000000000000000000..1bf940bfcfa95160f3230675ca0d41373f0b7eb2 --- /dev/null +++ b/chapter04/alexnet/main.py @@ -0,0 +1,105 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +AlexNet example tutorial +Usage: + python alexnet.py +with --device_target=GPU: After 20 epoch training, the accuracy is up to 80% +""" + +import os +import argparse +from config import alexnet_cfg as cfg +from alexnet import AlexNet +import mindspore.dataset as ds +import mindspore.nn as nn +from mindspore import context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor +from mindspore.train import Model +import mindspore.dataset.transforms.c_transforms as C +import mindspore.dataset.transforms.vision.c_transforms as CV +from mindspore.nn.metrics import Accuracy +from mindspore.common import dtype as mstype + + +def create_dataset(data_path, batch_size=32, repeat_size=1): + """ + create dataset for train or test + """ + cifar_ds = ds.Cifar10Dataset(data_path) + rescale = 1.0 / 255.0 + shift = 0.0 + + resize_op = CV.Resize((cfg.image_height, cfg.image_width)) + rescale_op = CV.Rescale(rescale, shift) + normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) + random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4]) + random_horizontal_op = CV.RandomHorizontalFlip() + channel_swap_op = CV.HWC2CHW() + typecast_op = C.TypeCast(mstype.int32) + cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op) + + cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size) + cifar_ds = cifar_ds.repeat(repeat_size) + cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) + return cifar_ds + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='MindSpore AlexNet Example') + parser.add_argument('--mode', type=str, default="train", choices=['train', 'test'], + help='implement phase, set to train or test') + parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'], + help='device where the code will be implemented (default: Ascend)') + parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved') + parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if mode is test, must provide\ + path where the trained ckpt file') + parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True') + args = parser.parse_args() + + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, enable_mem_reuse=False) + + network = AlexNet(cfg.num_classes) + loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + repeat_size = cfg.epoch_size + opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) + model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) # test + + if args.mode == 'train': + print("============== Starting Training ==============") + ds_train = create_dataset(args.data_path, + cfg.batch_size, + repeat_size) + config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet", directory=args.ckpt_path, config=config_ck) + model.train(cfg.epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor()], + dataset_sink_mode=args.dataset_sink_mode) + elif args.mode == 'test': + print("============== Starting Testing ==============") + param_dict = load_checkpoint(args.ckpt_path) + load_param_into_net(network, param_dict) + ds_eval = create_dataset(args.data_path) + acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode) + print("============== Accuracy:{} ==============".format(acc)) + else: + raise RuntimeError('mode should be train or test, rather than {}'.format(args.mode)) diff --git a/chapter05/.gitkeep b/chapter05/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter05/resnet/resnet.py b/chapter05/resnet/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..9cd65377122e4a7b30798f58e0f5f2041afca285 --- /dev/null +++ b/chapter05/resnet/resnet.py @@ -0,0 +1,382 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""ResNet.""" +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.common.initializer import TruncatedNormal, Normal + +def weight_variable(fan_in): + """Init weight.""" + stddev = (1.0/fan_in)**0.5 + return TruncatedNormal(stddev) + +def dense_weight_variable(): + """The weight for dense.""" + return Normal(0.01) + +def _conv3x3(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): + """Get a conv2d layer with 3x3 kernel size.""" + init_value = weight_variable(in_channels) + return nn.Conv2d(in_channels, out_channels, + kernel_size=3, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) + +def _conv1x1(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): + """Get a conv2d layer with 1x1 kernel size.""" + init_value = weight_variable(in_channels) + return nn.Conv2d(in_channels, out_channels, + kernel_size=1, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) + +def _conv7x7(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): + """Get a conv2d layer with 7x7 kernel size.""" + init_value = weight_variable(in_channels) + return nn.Conv2d(in_channels, out_channels, + kernel_size=7, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) + +def _fused_bn(channels, momentum=0.9): + """Get a fused batchnorm""" + return nn.BatchNorm2d(channels, eps=1e-4, momentum=momentum, gamma_init=1, beta_init=0) + +def _fused_bn_last(channels, momentum=0.9): + """Get a fused batchnorm""" + return nn.BatchNorm2d(channels, eps=1e-4, momentum=momentum, gamma_init=0, beta_init=0) + +class BasicBlock(nn.Cell): + """ + ResNet V1 basic block definition. + + Args: + in_channels: Integer. Input channel. + out_channels: Integer. Output channel. + stride: Integer. Stride size for the initial convolutional layer. Default:1. + momentum: Float. Momentum for batchnorm layer. Default:0.1. + + Returns: + Tensor, output tensor. + + Examples: + BasicBlock(3,256,stride=2,down_sample=True) + """ + expansion = 1 + + def __init__(self, + in_channels, + out_channels, + stride=1, + momentum=0.9): + super(BasicBlock, self).__init__() + + self.conv1 = _conv3x3(in_channels, out_channels, stride=stride) + self.bn1 = _fused_bn(out_channels, momentum=momentum) + self.conv2 = _conv3x3(out_channels, out_channels) + self.bn2 = _fused_bn(out_channels, momentum=momentum) + self.relu = P.ReLU() + self.down_sample_layer = None + self.downsample = (in_channels != out_channels) + if self.downsample: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channels, + out_channels, + stride=stride, + padding=0), + _fused_bn(out_channels, + momentum=momentum)]) + self.add = P.TensorAdd() + + def construct(self, x): + identity = x + + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + x = self.conv2(x) + x = self.bn2(x) + + if self.downsample: + identity = self.down_sample_layer(identity) + + out = self.add(x, identity) + out = self.relu(out) + + return out + + +class ResidualBlock(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channels: Integer. Input channel. + out_channels: Integer. Output channel. + stride: Integer. Stride size for the initial convolutional layer. Default:1. + momentum: Float. Momentum for batchnorm layer. Default:0.1. + + Returns: + Tensor, output tensor. + + Examples: + ResidualBlock(3,256,stride=2,down_sample=True) + """ + expansion = 4 + + def __init__(self, + in_channels, + out_channels, + stride=1, + momentum=0.9): + super(ResidualBlock, self).__init__() + + out_chls = out_channels // self.expansion + self.conv1 = _conv1x1(in_channels, out_chls, stride=1) + self.bn1 = _fused_bn(out_chls, momentum=momentum) + + self.conv2 = _conv3x3(out_chls, out_chls, stride=stride) + self.bn2 = _fused_bn(out_chls, momentum=momentum) + + self.conv3 = _conv1x1(out_chls, out_channels, stride=1) + self.bn3 = _fused_bn_last(out_channels, momentum=momentum) + + self.relu = P.ReLU() + self.downsample = (in_channels != out_channels) + self.stride = stride + if self.downsample: + self.conv_down_sample = _conv1x1(in_channels, out_channels, + stride=stride) + self.bn_down_sample = _fused_bn(out_channels, momentum=momentum) + elif self.stride != 1: + self.maxpool_down = nn.MaxPool2d(kernel_size=1, stride=2, pad_mode='same') + + self.add = P.TensorAdd() + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample: + identity = self.conv_down_sample(identity) + identity = self.bn_down_sample(identity) + elif self.stride != 1: + identity = self.maxpool_down(identity) + + out = self.add(out, identity) + out = self.relu(out) + + return out + + +class ResNet(nn.Cell): + """ + ResNet V1 network. + + Args: + block: Cell. Block for network. + layer_nums: List. Numbers of different layers. + in_channels: Integer. Input channel. + out_channels: Integer. Output channel. + num_classes: Integer. Class number. Default:100. + + Returns: + Tensor, output tensor. + + Examples: + ResNet(ResidualBlock, + [3, 4, 6, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + 100) + """ + + def __init__(self, + block, + layer_nums, + in_channels, + out_channels, + strides=(1, 2, 2, 2), + num_classes=100): + super(ResNet, self).__init__() + + if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: + raise ValueError("the length of " + "layer_num, inchannel, outchannel list must be 4!") + + self.conv1 = _conv7x7(3, 64, stride=2) + self.bn1 = _fused_bn(64) + self.relu = P.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='same') + + self.layer1 = self._make_layer(block, + layer_nums[0], + in_channel=in_channels[0], + out_channel=out_channels[0], + stride=strides[0]) + self.layer2 = self._make_layer(block, + layer_nums[1], + in_channel=in_channels[1], + out_channel=out_channels[1], + stride=strides[1]) + self.layer3 = self._make_layer(block, + layer_nums[2], + in_channel=in_channels[2], + out_channel=out_channels[2], + stride=strides[2]) + self.layer4 = self._make_layer(block, + layer_nums[3], + in_channel=in_channels[3], + out_channel=out_channels[3], + stride=strides[3]) + + self.mean = P.ReduceMean(keep_dims=True) + self.end_point = nn.Dense(out_channels[3], num_classes, has_bias=True, + weight_init=dense_weight_variable()) + self.squeeze = P.Squeeze() + self.cast = P.Cast() + + def _make_layer(self, block, layer_num, in_channel, out_channel, stride): + """ + Make Layer for ResNet. + + Args: + block: Cell. Resnet block. + layer_num: Integer. Layer number. + in_channel: Integer. Input channel. + out_channel: Integer. Output channel. + stride:Integer. Stride size for the initial convolutional layer. + + Returns: + SequentialCell, the output layer. + + Examples: + _make_layer(BasicBlock, 3, 128, 256, 2) + """ + layers = [] + + resblk = block(in_channel, out_channel, stride=1) + layers.append(resblk) + + for _ in range(1, layer_num - 1): + resblk = block(out_channel, out_channel, stride=1) + layers.append(resblk) + + resblk = block(out_channel, out_channel, stride=stride) + layers.append(resblk) + + return nn.SequentialCell(layers) + + def construct(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + c1 = self.maxpool(x) + + c2 = self.layer1(c1) + c3 = self.layer2(c2) + c4 = self.layer3(c3) + c5 = self.layer4(c4) + + out = self.mean(c5, (2, 3)) + out = self.squeeze(out) + out = self.end_point(out) + + return out + + +def resnet50(class_num=10): + """ + Get ResNet50 neural network. + + Args: + class_num: Integer. Class number. + + Returns: + Cell, cell instance of ResNet50 neural network. + + Examples: + resnet50(100) + """ + return ResNet(ResidualBlock, + [3, 4, 6, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [2, 2, 2, 1], + class_num) + + +def resnet101(class_num=10): + """ + Get ResNet101 neural network. + + Args: + class_num: Integer. Class number. + + Returns: + Cell, cell instance of ResNet101 neural network. + + Examples: + resnet101(100) + """ + return ResNet(ResidualBlock, + [3, 4, 23, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + class_num) + + +def resnet34(class_num=10): + """ + Get ResNet34 neural network. + + Args: + class_num: Integer. Class number. + + Returns: + Cell, cell instance of ResNet34 neural network. + + Examples: + resnet34(100) + """ + return ResNet(BasicBlock, + [3, 4, 6, 3], + [64, 64, 128, 256], + [64, 128, 256, 512], + class_num) + + +def resnet18(class_num=10): + """ + Get ResNet18 neural network. + + Args: + class_num: Integer. Class number. + + Returns: + Cell, cell instance of ResNet18 neural network. + + Examples: + resnet18(100) + """ + return ResNet(BasicBlock, + [2, 2, 2, 2], + [64, 64, 128, 256], + [64, 128, 256, 512], + class_num) diff --git a/chapter05/resnet/resnet_cifar.py b/chapter05/resnet/resnet_cifar.py new file mode 100644 index 0000000000000000000000000000000000000000..c9c597fbfdb6a1678a954dafa693c4fd56420d29 --- /dev/null +++ b/chapter05/resnet/resnet_cifar.py @@ -0,0 +1,150 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +train and evaluate resnet example for cifar10 dataset +1.For environment used for deploying this script please choose Ascend. +2.Aroud 30s per epoch and about 90% accuracy when the number of epoch reaches 34. +""" +import os +import random +import argparse +import numpy as np +import mindspore.nn as nn +import mindspore.common.dtype as mstype +import mindspore.ops.functional as F +import mindspore.dataset as de +import mindspore.dataset.transforms.vision.c_transforms as C +import mindspore.dataset.transforms.c_transforms as C2 +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore.nn.optim.momentum import Momentum +from mindspore.train.model import Model, ParallelMode +from mindspore import context +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.communication.management import init +from mindspore.parallel._auto_parallel_context import auto_parallel_context +from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits +from resnet import resnet50 +random.seed(1) +np.random.seed(1) +de.config.set_seed(1) + +parser = argparse.ArgumentParser(description='Image classification.') +parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'], + help='device where the code will be implemented (default: Ascend)') +parser.add_argument('--run_distribute', type=bool, default=False, help='Run distributei.') +parser.add_argument('--device_num', type=int, default=1, help='Device num.') +parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.') +parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.') +parser.add_argument('--epoch_size', type=int, default=1, help='Epoch size.') +parser.add_argument('--batch_size', type=int, default=32, help='Batch size.') +parser.add_argument('--num_classes', type=int, default=10, help='Num classes.') +parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path.') +parser.add_argument('--dataset_path', type=str, default="./datasets/cifar/cifar-10-batches-bin", + help='Dataset path.') +args_opt = parser.parse_args() + +#The path of the data. +data_home = args_opt.dataset_path + +#Choose the graph_mode as mode, the env is Ascend and save graphs like ir +context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, save_graphs=True) +if args_opt.device_target == "Ascend": + #Choose one availabe Device to use on users' env. + device_id = int(os.getenv('DEVICE_ID')) + context.set_context(enable_task_sink=True, device_id=device_id) + context.set_context(enable_loop_sink=True) + context.set_context(enable_mem_reuse=False) + +def create_dataset(repeat_num=1, training=True): + """create the dataset of cifar10""" + ds = de.Cifar10Dataset(data_home) + + if args_opt.run_distribute: + rank_id = int(os.getenv('RANK_ID')) + rank_size = int(os.getenv('RANK_SIZE')) + ds = de.Cifar10Dataset(data_home, num_shards=rank_size, shard_id=rank_id) + + resize_height = 224 + resize_width = 224 + rescale = 1.0 / 255.0 + shift = 0.0 + + # define map operations + random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT + random_horizontal_op = C.RandomHorizontalFlip() + resize_op = C.Resize((resize_height, resize_width)) # interpolation default BILINEAR + rescale_op = C.Rescale(rescale, shift) + normalize_op = C.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) + changeswap_op = C.HWC2CHW() + type_cast_op = C2.TypeCast(mstype.int32) + + c_trans = [] + if training: + c_trans = [random_crop_op, random_horizontal_op] + c_trans += [resize_op, rescale_op, normalize_op, + changeswap_op] + + # apply map operations on images + ds = ds.map(input_columns="label", operations=type_cast_op) + ds = ds.map(input_columns="image", operations=c_trans) + + # apply repeat operations + ds = ds.repeat(repeat_num) + + # apply shuffle operations + ds = ds.shuffle(buffer_size=10) + + # apply batch operations + ds = ds.batch(batch_size=args_opt.batch_size, drop_remainder=True) + + return ds + +if __name__ == '__main__': + if args_opt.do_eval: + context.set_context(enable_hccl=False) + else: + if args_opt.run_distribute: + context.set_context(enable_hccl=True) + context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) + auto_parallel_context().set_all_reduce_fusion_split_indices([140]) + init() + else: + context.set_context(enable_hccl=False) + + epoch_size = args_opt.epoch_size + net = resnet50(args_opt.num_classes) + ls = SoftmaxCrossEntropyWithLogits(sparse=True, is_grad=False, reduction="mean") + opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) + + model = Model(net, loss_fn=ls, optimizer=opt, metrics={'acc'}) + + if args_opt.do_train: + dataset = create_dataset(epoch_size) + batch_num = dataset.get_dataset_size() + config_ck = CheckpointConfig(save_checkpoint_steps=batch_num, keep_checkpoint_max=10) + ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10", directory="./", config=config_ck) + loss_cb = LossMonitor() + model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) + + if args_opt.do_eval: + if args_opt.checkpoint_path: + param_dict = load_checkpoint(args_opt.checkpoint_path) + load_param_into_net(net, param_dict) + net.set_train(False) + eval_dataset = create_dataset(1, training=False) + res = model.eval(eval_dataset) + print("result: ", res) diff --git a/chapter06/.gitkeep b/chapter06/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter06/lstm/config.py b/chapter06/lstm/config.py new file mode 100644 index 0000000000000000000000000000000000000000..0ae2d048be6ab5d9176c70c6210539775ad6507a --- /dev/null +++ b/chapter06/lstm/config.py @@ -0,0 +1,33 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config +""" +from easydict import EasyDict as edict + +# LSTM CONFIG +lstm_cfg = edict({ + 'num_classes': 2, + 'learning_rate': 0.1, + 'momentum': 0.9, + 'num_epochs': 1, + 'batch_size': 64, + 'embed_size': 300, + 'num_hiddens': 100, + 'num_layers': 2, + 'bidirectional': True, + 'save_checkpoint_steps': 390, + 'keep_checkpoint_max': 10 +}) diff --git a/chapter06/lstm/main.py b/chapter06/lstm/main.py new file mode 100644 index 0000000000000000000000000000000000000000..d4483a514a5b105a8dd53d352ea278a69eadefd8 --- /dev/null +++ b/chapter06/lstm/main.py @@ -0,0 +1,340 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +LSTM Tutorial +""" +import os +import shutil +import math +import argparse +import json +from itertools import chain +import numpy as np +from config import lstm_cfg as cfg + +import mindspore.nn as nn +import mindspore.context as context +import mindspore.dataset as ds +from mindspore.ops import operations as P +from mindspore import Tensor +from mindspore.common.initializer import initializer +from mindspore.common.parameter import Parameter +from mindspore.mindrecord import FileWriter +from mindspore.train import Model +from mindspore.nn.metrics import Accuracy +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor +# Install gensim with 'pip install gensim' +import gensim + + +def encode_samples(tokenized_samples, word_to_idx): + """ encode word to index """ + features = [] + for sample in tokenized_samples: + feature = [] + for token in sample: + if token in word_to_idx: + feature.append(word_to_idx[token]) + else: + feature.append(0) + features.append(feature) + return features + +def pad_samples(features, maxlen=500, pad=0): + """ pad all features to the same length """ + padded_features = [] + for feature in features: + if len(feature) >= maxlen: + padded_feature = feature[:maxlen] + else: + padded_feature = feature + while len(padded_feature) < maxlen: + padded_feature.append(pad) + padded_features.append(padded_feature) + return padded_features + +def read_imdb(path, seg='train'): + """ read imdb dataset """ + pos_or_neg = ['pos', 'neg'] + data = [] + for label in pos_or_neg: + files = os.listdir(os.path.join(path, seg, label)) + for file in files: + with open(os.path.join(path, seg, label, file), 'r', encoding='utf8') as rf: + review = rf.read().replace('\n', '') + if label == 'pos': + data.append([review, 1]) + elif label == 'neg': + data.append([review, 0]) + return data + +def tokenizer(text): + return [tok.lower() for tok in text.split(' ')] + +def collect_weight(glove_path, vocab, word_to_idx, embed_size): + """ collect weight """ + vocab_size = len(vocab) + wvmodel = gensim.models.KeyedVectors.load_word2vec_format(os.path.join(glove_path, 'glove.6B.300d.txt'), + binary=False, encoding='utf-8') + weight_np = np.zeros((vocab_size+1, embed_size)).astype(np.float32) + + idx_to_word = {i+1: word for i, word in enumerate(vocab)} + idx_to_word[0] = '' + + for i in range(len(wvmodel.index2word)): + try: + index = word_to_idx[wvmodel.index2word[i]] + except KeyError: + continue + weight_np[index, :] = wvmodel.get_vector( + idx_to_word[word_to_idx[wvmodel.index2word[i]]]) + return weight_np + +def preprocess(aclimdb_path, glove_path, embed_size): + """ preprocess the train and test data """ + train_data = read_imdb(aclimdb_path, 'train') + test_data = read_imdb(aclimdb_path, 'test') + + train_tokenized = [] + test_tokenized = [] + for review, _ in train_data: + train_tokenized.append(tokenizer(review)) + for review, _ in test_data: + test_tokenized.append(tokenizer(review)) + + vocab = set(chain(*train_tokenized)) + vocab_size = len(vocab) + print("vocab_size: ", vocab_size) + + word_to_idx = {word: i+1 for i, word in enumerate(vocab)} + word_to_idx[''] = 0 + + train_features = np.array(pad_samples(encode_samples(train_tokenized, word_to_idx))).astype(np.int32) + train_labels = np.array([score for _, score in train_data]).astype(np.int32) + test_features = np.array(pad_samples(encode_samples(test_tokenized, word_to_idx))).astype(np.int32) + test_labels = np.array([score for _, score in test_data]).astype(np.int32) + + weight_np = collect_weight(glove_path, vocab, word_to_idx, embed_size) + return train_features, train_labels, test_features, test_labels, weight_np, vocab_size + +def get_imdb_data(labels_data, features_data): + data_list = [] + for i, (label, feature) in enumerate(zip(labels_data, features_data)): + data_json = {"id": i, + "label": int(label), + "feature": feature.reshape(-1)} + data_list.append(data_json) + return data_list + +def convert_to_mindrecord(embed_size, aclimdb_path, proprocess_path, glove_path): + """ convert imdb dataset to mindrecord """ + num_shard = 4 + train_features, train_labels, test_features, test_labels, weight_np, _ = \ + preprocess(aclimdb_path, glove_path, embed_size) + np.savetxt(os.path.join(proprocess_path, 'weight.txt'), weight_np) + + # write mindrecord + schema_json = {"id": {"type": "int32"}, + "label": {"type": "int32"}, + "feature": {"type": "int32", "shape":[-1]}} + + writer = FileWriter(os.path.join(proprocess_path, 'aclImdb_train.mindrecord'), num_shard) + data = get_imdb_data(train_labels, train_features) + writer.add_schema(schema_json, "nlp_schema") + writer.add_index(["id", "label"]) + writer.write_raw_data(data) + writer.commit() + + writer = FileWriter(os.path.join(proprocess_path, 'aclImdb_test.mindrecord'), num_shard) + data = get_imdb_data(test_labels, test_features) + writer.add_schema(schema_json, "nlp_schema") + writer.add_index(["id", "label"]) + writer.write_raw_data(data) + writer.commit() + +def init_lstm_weight( + input_size, + hidden_size, + num_layers, + bidirectional, + has_bias=True): + """Initialize lstm weight.""" + num_directions = 1 + if bidirectional: + num_directions = 2 + + weight_size = 0 + gate_size = 4 * hidden_size + for layer in range(num_layers): + for _ in range(num_directions): + input_layer_size = input_size if layer == 0 else hidden_size * num_directions + weight_size += gate_size * input_layer_size + weight_size += gate_size * hidden_size + if has_bias: + weight_size += 2 * gate_size + + stdv = 1 / math.sqrt(hidden_size) + w_np = np.random.uniform(-stdv, stdv, (weight_size, + 1, 1)).astype(np.float32) + w = Parameter( + initializer( + Tensor(w_np), [ + weight_size, 1, 1]), name='weight') + + return w + + +def lstm_default_state(batch_size, hidden_size, num_layers, bidirectional): + """init default input.""" + num_directions = 1 + if bidirectional: + num_directions = 2 + + h = Tensor( + np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)) + c = Tensor( + np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)) + return h, c + + +class SentimentNet(nn.Cell): + """Sentiment network structure.""" + def __init__(self, + vocab_size, + embed_size, + num_hiddens, + num_layers, + bidirectional, + num_classes, + weight, + batch_size): + super(SentimentNet, self).__init__() + self.embedding = nn.Embedding(vocab_size, + embed_size, + embedding_table=weight) + self.embedding.embedding_table.requires_grad = False + self.trans = P.Transpose() + self.perm = (1, 0, 2) + self.encoder = nn.LSTM(input_size=embed_size, + hidden_size=num_hiddens, + num_layers=num_layers, + has_bias=True, + bidirectional=bidirectional, + dropout=0.0) + w_init = init_lstm_weight( + embed_size, + num_hiddens, + num_layers, + bidirectional) + self.encoder.weight = w_init + self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) + + self.concat = P.Concat(1) + if bidirectional: + self.decoder = nn.Dense(num_hiddens * 4, num_classes) + else: + self.decoder = nn.Dense(num_hiddens * 2, num_classes) + + def construct(self, inputs): + # (64,500,300) + embeddings = self.embedding(inputs) + embeddings = self.trans(embeddings, self.perm) + output, _ = self.encoder(embeddings, (self.h, self.c)) + # states[i] size(64,200) -> encoding.size(64,400) + encoding = self.concat((output[0], output[1])) + outputs = self.decoder(encoding) + return outputs + + +def create_dataset(base_path, batch_size, num_epochs, is_train): + """Create dataset for training.""" + columns_list = ["feature", "label"] + num_consumer = 4 + + if is_train: + path = os.path.join(base_path, 'aclImdb_train.mindrecord0') + else: + path = os.path.join(base_path, 'aclImdb_test.mindrecord0') + + dtrain = ds.MindDataset(path, columns_list, num_consumer) + dtrain = dtrain.shuffle(buffer_size=dtrain.get_dataset_size()) + dtrain = dtrain.batch(batch_size, drop_remainder=True) + dtrain = dtrain.repeat(count=num_epochs) + + return dtrain + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='MindSpore LSTM Example') + parser.add_argument('--preprocess', type=str, default='false', choices=['true', 'false'], + help='Whether to perform data preprocessing') + parser.add_argument('--mode', type=str, default="train", choices=['train', 'test'], + help='implement phase, set to train or test') + # Download dataset from 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz' and extract to 'aclimdb_path' + parser.add_argument('--aclimdb_path', type=str, default="./aclImdb", + help='path where the dataset is store') + # Download glove from 'http://nlp.stanford.edu/data/glove.6B.zip' and extract to 'glove_path' + # Add a new line '400000 300' at the beginning of 'glove.6B.300d.txt' with '40000' for total words and '300' for vector length + parser.add_argument('--glove_path', type=str, default="./glove", + help='path where the glove is store') + parser.add_argument('--preprocess_path', type=str, default="./preprocess", + help='path where the pre-process data is store') + parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if mode is test, must provide\ + path where the trained ckpt file') + args = parser.parse_args() + + context.set_context( + mode=context.GRAPH_MODE, + save_graphs=False, + device_target="GPU") + + if args.preprocess == 'true': + print("============== Starting Data Pre-processing ==============") + shutil.rmtree(args.preprocess_path) + os.mkdir(args.preprocess_path) + convert_to_mindrecord(cfg.embed_size, args.aclimdb_path, args.preprocess_path, args.glove_path) + + embedding_table = np.loadtxt(os.path.join(args.preprocess_path, "weight.txt")).astype(np.float32) + network = SentimentNet(vocab_size=embedding_table.shape[0], + embed_size=cfg.embed_size, + num_hiddens=cfg.num_hiddens, + num_layers=cfg.num_layers, + bidirectional=cfg.bidirectional, + num_classes=cfg.num_classes, + weight=Tensor(embedding_table), + batch_size=cfg.batch_size) + + loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) + opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) + loss_cb = LossMonitor() + model = Model(network, loss, opt, {'acc': Accuracy()}) + + if args.mode == 'train': + print("============== Starting Training ==============") + ds_train = create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs, True) + config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck) + model.train(cfg.num_epochs, ds_train, callbacks=[ckpoint_cb, loss_cb]) + elif args.mode == 'test': + print("============== Starting Testing ==============") + ds_eval = create_dataset(args.preprocess_path, cfg.batch_size, 1, False) + param_dict = load_checkpoint(args.ckpt_path) + load_param_into_net(network, param_dict) + acc = model.eval(ds_eval) + print("============== Accuracy:{} ==============".format(acc)) + else: + raise RuntimeError('mode should be train or test, rather than {}'.format(args.mode)) diff --git a/chapter07/.gitkeep b/chapter07/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter08/.gitkeep b/chapter08/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter08/me_gcn.py b/chapter08/me_gcn.py new file mode 100644 index 0000000000000000000000000000000000000000..3e0fde64e37dc69cfc1af9e8ec4124819f42476d --- /dev/null +++ b/chapter08/me_gcn.py @@ -0,0 +1,64 @@ +from mindspore.nn.layer import Cell +from mindspore.ops import operations +from mindspore.nn.layer.core import Dense +from mindspore.nn.layer.activation import ReLU +from mindspore.application.gnn import initialize_embedded_graph +from mindspore.application.gnn.base import get_feature, get_neighbor, get_label +from mindspore.ops.nn_ops import Momentum +from mindspore.core.parameter import Parameter +from mindspore.application.gnn.base import NetWithLossClass, GradWrap + +class GCNAggregator(Cell): + def __init__(self, in_dim, out_dim): + super(GCNAggregator, self).__init__() + self.add = operations.TensorAdd() + self.div = operations.TensorDiv() + self.spmm = operations.SparseDenseMatmul() + self.fc = Dense(in_dim, out_dim) + self.relu = ReLU() + def construct(self, adj, node_emb, neighbor_emb): + agg_emb = self.spmm(adj[0], adj[1], adj[2], neighbor_emb) + agg_emb = self.add(node_emb, agg_emb) + agg_emb = self.div(agg_emb, adj[3]) + agg_emb = self.fc(agg_emb) + agg_emb = self.relu(agg_emb) + return agg_emb + +class SingleLayerGCN(Cell): + def __init__(self, in_dim, out_dim, num_classes): + super(SingleLayerGCN, self).__init__() + self.aggregator = GCNAggregator(in_dim, out_dim) + self.output_layer = Dense(out_dim, num_classes) + def construct(self, adj, node_feature, neighbor_feature ): + embeddings = self.aggregator(adj, node_feature, neighbor_feature) + output = self.output_layer(embeddings) + return output + +def GCNTrainer(in_dim, out_dim, num_classes,num_epoch, graph_data): + input_node, neighbor_node, node_feature, neighbor_feature, labels = graph_data + network = SingleLayerGCN(in_dim, out_dim, num_classes) + loss_network = NetWithLossClass(network) + train_net = GradWrap(loss_network) + train_net.train(True) + parameters = train_net.weights + momentum = Momentum() + lr_v = Parameter(0.01, name="learning_rate") + momen_v = Parameter(0.01, name="momentum") + for _ in range(num_epoch): + grads = train_net.construct(adj_list[0], node_feature, neighbor_feature, labels) + accumulations = parameters.clone(prefix='moments') + for i, element in enumerate(grads): + updated = momentum(element, accumulations[i], parameters[i], lr_v, momen_v) + parameters[i].set_parameter_data(updated) + +initilize_embedded_graph(GRAPH_DIR) +neighbor_node, adj_list = get_neighbor(input_node, k_hop) +node_feature = get_feature(input_node) +neighbor_feature = get_feature(neighbor_node) +labels = get_label(input_node) +graph_data = [input_node, neighbor_node, node_feature, neighbor_feature, labels] +in_dim = IN_DIM +out_dim = OUT_DIM +num_classes = CLASS_NUM +num_epoch = EPOCH_NUM +GCNTrainer(in_dim, out_dim, num_classes,num_epoch, graph_data) diff --git a/chapter09/.gitkeep b/chapter09/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter10/.gitkeep b/chapter10/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter11/.gitkeep b/chapter11/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter12/.gitkeep b/chapter12/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter13/.gitkeep b/chapter13/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter13/lenet_with_summary.py b/chapter13/lenet_with_summary.py new file mode 100644 index 0000000000000000000000000000000000000000..6aed2d0b8310ba24adf1c814ccd3545374ff2d13 --- /dev/null +++ b/chapter13/lenet_with_summary.py @@ -0,0 +1,182 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import argparse + +from mindspore import dataset +from mindspore import nn +from mindspore import context, Tensor +from mindspore.train import Model +from mindspore.ops import operations as P +from mindspore.common.initializer import TruncatedNormal +from mindspore.dataset.transforms.vision import c_transforms as transforms +from mindspore.dataset.transforms.vision import Inter +from mindspore.dataset.transforms import c_transforms as C +from mindspore.ops import functional as F +from mindspore.common import dtype as mstype +from mindspore.train.callback import SummaryStep +from mindspore.train.summary.summary_record import SummaryRecord + + +class CrossEntropyLoss(nn.Cell): + """ + Define loss for network + """ + def __init__(self): + super(CrossEntropyLoss, self).__init__() + self.sm_scalar = P.ScalarSummary() + self.cross_entropy = P.SoftmaxCrossEntropyWithLogits() + self.mean = P.ReduceMean() + self.one_hot = P.OneHot() + self.on_value = Tensor(1.0, mstype.float32) + self.off_value = Tensor(0.0, mstype.float32) + + def construct(self, logits, label): + label = self.one_hot(label, F.shape(logits)[1], self.on_value, self.off_value) + loss = self.cross_entropy(logits, label)[0] + loss = self.mean(loss, (-1,)) + self.sm_scalar("loss", loss) + return loss + + +def create_dataset(data_path, batch_size=32, repeat_size=1, + num_parallel_workers=1): + """ + create dataset for train or test + """ + # define dataset + mnist_ds = dataset.MnistDataset(data_path) + + resize_height, resize_width = 32, 32 + rescale = 1.0 / 255.0 + shift = 0.0 + + # define map operations + resize_op = transforms.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode + rescale_op = transforms.Rescale(rescale, shift) + hwc2chw_op = transforms.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + # apply map operations on images + mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) + mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) + + # apply DatasetOps + buffer_size = 10000 + mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) + mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) + mnist_ds = mnist_ds.repeat(repeat_size) + + return mnist_ds + + +def conv(in_channels, out_channels, kernel_size, stride=1, padding=0): + weight = weight_variable() + return nn.Conv2d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, + weight_init=weight, has_bias=False, pad_mode="valid") + + +def fc_with_initialize(input_channels, out_channels): + weight = weight_variable() + bias = weight_variable() + return nn.Dense(input_channels, out_channels, weight, bias) + + +def weight_variable(): + return TruncatedNormal(0.02) + + +class LeNet5(nn.Cell): + """ + Lenet network + """ + def __init__(self): + super(LeNet5, self).__init__() + self.sm_image = P.ImageSummary() + + self.batch_size = 32 + self.conv1 = conv(1, 6, 5) + self.conv2 = conv(6, 16, 5) + self.fc1 = fc_with_initialize(16 * 5 * 5, 120) + self.fc2 = fc_with_initialize(120, 84) + self.fc3 = fc_with_initialize(84, 10) + self.relu = nn.ReLU() + self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.reshape = P.Reshape() + + def construct(self, x): + self.sm_image("image", x) + x = self.conv1(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.conv2(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.reshape(x, (self.batch_size, -1)) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.relu(x) + x = self.fc3(x) + return x + + +def main(data_path, device_target='Ascend', summary_dir='./summary_dir', learning_rate=0.01): + context.set_context(mode=context.GRAPH_MODE, device_target=device_target) + + momentum = 0.9 + epoch_size = 1 + batch_size = 32 + + network = LeNet5() + network.set_train() + net_loss = CrossEntropyLoss() + net_opt = nn.Momentum(network.trainable_params(), learning_rate, momentum) + model = Model(network, net_loss, net_opt) + + # add summary writer + summary_writer = SummaryRecord(log_dir=summary_dir, network=network) + summary_callback = SummaryStep(summary_writer, flush_step=10) + + ds = create_dataset(os.path.join(data_path, "train"), batch_size=batch_size) + + print("============== Starting Training ==============") + model.train(epoch_size, ds, callbacks=[summary_callback], dataset_sink_mode=False) + summary_writer.close() + print("============== Train End =====================") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='MindSpore LeNet5 with summary Example') + parser.add_argument('--device_target', type=str, default="Ascend", + choices=['Ascend', 'GPU', 'CPU'], + help='Device where the code will be implemented (default: Ascend)') + parser.add_argument('--data_path', type=str, default="./MNIST_Data", + help='Path where the dataset is saved') + parser.add_argument('--summary_dir', type=str, default='./summary_dir', + help='Summaries log directory.') + parser.add_argument('--learning_rate', type=float, default=0.01, + help='Initial learning rate') + + args = parser.parse_args() + + main(data_path=args.data_path, + device_target=args.device_target, + summary_dir=args.summary_dir, + learning_rate=args.learning_rate) diff --git a/chapter14/.gitkeep b/chapter14/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/chapter14/ImageNetDataSimulation/images/n00000002/2_1.jpg b/chapter14/ImageNetDataSimulation/images/n00000002/2_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..add5186cfea917830a53a42f6d81946fd6d3c4c3 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000002/2_1.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000002/2_2.jpg b/chapter14/ImageNetDataSimulation/images/n00000002/2_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5ca2194e88d8ee5e1b5a930ffad9f3feac694da2 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000002/2_2.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000002/2_3.jpg b/chapter14/ImageNetDataSimulation/images/n00000002/2_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..eefa9123544ca5bf432cc3764fe41e67fa958dea Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000002/2_3.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000002/2_4.jpg b/chapter14/ImageNetDataSimulation/images/n00000002/2_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..53ce82f64285e40b7a148c730fb5deb19fdabf4b Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000002/2_4.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000002/2_5.jpg b/chapter14/ImageNetDataSimulation/images/n00000002/2_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4f44baacb56294e5f98661c1b64a86c551102118 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000002/2_5.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000005/5_1.jpg b/chapter14/ImageNetDataSimulation/images/n00000005/5_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0e7b174e4b4b737e035ba2d57c990adf01137db1 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000005/5_1.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000005/5_2.jpg b/chapter14/ImageNetDataSimulation/images/n00000005/5_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5538fb58ad7593eddda1991a919bd929b064bf0e Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000005/5_2.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000005/5_3.jpg b/chapter14/ImageNetDataSimulation/images/n00000005/5_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9812c7b363b0ff1b640a8d29d2cd3164d06563e5 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000005/5_3.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000005/5_4.jpg b/chapter14/ImageNetDataSimulation/images/n00000005/5_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fb2d1b0c468db4c085c7e90c1abf4b45b690037d Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000005/5_4.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000005/5_5.jpg b/chapter14/ImageNetDataSimulation/images/n00000005/5_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5e71d543e70bb1155368ba0225630e541cf37fee Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000005/5_5.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000006/6_1.jpg b/chapter14/ImageNetDataSimulation/images/n00000006/6_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4e6cb19d1eefa56dea4b05aca23d9973b4acb7ad Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000006/6_1.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000006/6_2.jpg b/chapter14/ImageNetDataSimulation/images/n00000006/6_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..383173181f5a9a76a49c8be4983887eeccd65fc6 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000006/6_2.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000006/6_3.jpg b/chapter14/ImageNetDataSimulation/images/n00000006/6_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..764d748c9392280151f6083dc1a7f165e305491a Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000006/6_3.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000006/6_4.jpg b/chapter14/ImageNetDataSimulation/images/n00000006/6_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4de1549b2084714cf0e3a7b4f19ee186c5eb7c74 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000006/6_4.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000006/6_5.jpg b/chapter14/ImageNetDataSimulation/images/n00000006/6_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a6a8c8d83f382c154cc4c73d41bc695667eb30e8 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000006/6_5.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000007/7_1.jpg b/chapter14/ImageNetDataSimulation/images/n00000007/7_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..80f16e1592cb1716cced29e5b0ee8f6cbdd6f7df Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000007/7_1.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000007/7_2.jpg b/chapter14/ImageNetDataSimulation/images/n00000007/7_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..aeda3ca4f7856dab2d0a522fc6f18fa24fd0d749 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000007/7_2.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000007/7_3.jpg b/chapter14/ImageNetDataSimulation/images/n00000007/7_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0eaf6f0e88f9e2b221bf6dd2c729d089e56a8fb3 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000007/7_3.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000007/7_4.jpg b/chapter14/ImageNetDataSimulation/images/n00000007/7_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f510a59d5ee9cb43d4d2b4d9f7353ed4bf5fbbe3 Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000007/7_4.jpg differ diff --git a/chapter14/ImageNetDataSimulation/images/n00000007/7_5.jpg b/chapter14/ImageNetDataSimulation/images/n00000007/7_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3d0d07f5eeabe930e44f02b55c6ae1655fd9a19d Binary files /dev/null and b/chapter14/ImageNetDataSimulation/images/n00000007/7_5.jpg differ diff --git a/chapter14/ImageNetDataSimulation/labels_map.txt b/chapter14/ImageNetDataSimulation/labels_map.txt new file mode 100644 index 0000000000000000000000000000000000000000..73e9759b89981f57229b777c62848ea91e606b4e --- /dev/null +++ b/chapter14/ImageNetDataSimulation/labels_map.txt @@ -0,0 +1,4 @@ +n00000005 0 data_line +n00000006 1 small_iron_box +n00000007 2 plastic_toothpicks +n00000002 3 orange diff --git a/chapter14/create_dataset_using_cifar10.py b/chapter14/create_dataset_using_cifar10.py new file mode 100644 index 0000000000000000000000000000000000000000..9443f367c64ee7841544409b29f504c9d16fda76 --- /dev/null +++ b/chapter14/create_dataset_using_cifar10.py @@ -0,0 +1,108 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os +from easydict import EasyDict as edict + +import mindspore.dataset as de +from mindspore.dataset.transforms import c_transforms as C +from mindspore.dataset.transforms.vision import c_transforms as vision +from mindspore.common import dtype as mstype +import utils + +CIFAR_URL = "http://www.cs.toronto.edu/~kriz/" + +def download_cifar(target_directory, files, directory_from_tar): + if target_directory == None: + target_directory = utils.create_data_cache_dir() + + utils.download_and_uncompress([files], CIFAR_URL, target_directory, is_tar=True) + + ##if target dir was specify move data from directory created by tar + ##and put data into target dir + if target_directory != None: + tar_dir_full_path = os.path.join(target_directory, directory_from_tar) + all_files = os.path.join(tar_dir_full_path, "*") + cmd = "mv " + all_files + " " + target_directory + if os.path.exists(tar_dir_full_path): + print("copy files back to target_directory") + print("Executing: ", cmd) + rc1 = os.system(cmd) + rc2 = os.system("rm -r " + tar_dir_full_path) + if rc1 != 0 or rc2 != 0: + print("error when running command: ", cmd) + download_file = os.path.join(target_directory, files) + print("removing " + download_file) + os.system("rm " + download_file) + + ##exit with error so that build script will fail + raise SystemError + + ##change target directory to directory after tar + return target_directory, os.path.join(target_directory, directory_from_tar) + +def create_cifar10_dataset(cifar_dir, num_parallel_workers=1): + """ + Creat the cifar10 dataset. + """ + ds = de.Cifar10Dataset(cifar_dir) + + training = True + resize_height = 224 + resize_width = 224 + rescale = 1.0 / 255.0 + shift = 0.0 + repeat_num = 10 + batch_size = 32 + + # define map operations + random_crop_op = vision.RandomCrop((32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT + random_horizontal_op = vision.RandomHorizontalFlip() + resize_op = vision.Resize((resize_height, resize_width)) # interpolation default BILINEAR + rescale_op = vision.Rescale(rescale, shift) + normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023)) + changeswap_op = vision.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + if training: + c_trans = [random_crop_op, random_horizontal_op] + c_trans += [resize_op, rescale_op, normalize_op, + changeswap_op] + + # apply map operations on images + ds = ds.map(input_columns="label", operations=type_cast_op) + ds = ds.map(input_columns="image", operations=c_trans) + + # apply repeat operations + ds = ds.repeat(repeat_num) + + # apply shuffle operations + ds = ds.shuffle(buffer_size=10) + + # apply batch operations + ds = ds.batch(batch_size=batch_size, drop_remainder=True) + + return ds + +def download_cifar10(target_directory=None): + return download_cifar(target_directory, "cifar-10-binary.tar.gz", "cifar-10-batches-bin") + +if __name__ == "__main__": + dataset_dir, _ = download_cifar10() + data_set = create_cifar10_dataset(dataset_dir) + for data in data_set.create_dict_iterator(): + print(data['image'].shape) + print(data['label']) + print('------------') diff --git a/chapter14/create_dataset_using_imagenet.py b/chapter14/create_dataset_using_imagenet.py new file mode 100644 index 0000000000000000000000000000000000000000..99d26bc442656b61b3164f4f24a33ce67751a978 --- /dev/null +++ b/chapter14/create_dataset_using_imagenet.py @@ -0,0 +1,39 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import mindspore.dataset as de +import mindspore.dataset.transforms.vision.py_transforms as F + +def create_imagenet_dataset(imagenet_dir): + ds = de.ImageFolderDatasetV2(imagenet_dir) + + transform = F.ComposeOp([F.Decode(), + F.RandomHorizontalFlip(0.5), + F.ToTensor(), + F.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262)), + F.RandomErasing()]) + ds = ds.map(input_columns="image", operations=transform()) + ds = ds.shuffle(buffer_size=5) + ds = ds.repeat(3) + return ds + +if __name__ == "__main__": + data_set = create_imagenet_dataset('ImageNetDataSimulation/images') + count = 0 + for data in data_set.create_dict_iterator(): + print(data['image'].shape) + print('------------') + count += 1 + print(count) diff --git a/chapter14/create_dataset_using_mindrecord.py b/chapter14/create_dataset_using_mindrecord.py new file mode 100644 index 0000000000000000000000000000000000000000..2513ddcc257668048c749d59ff41494a9db91c1c --- /dev/null +++ b/chapter14/create_dataset_using_mindrecord.py @@ -0,0 +1,54 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import collections +import numpy as np +import os +import re +import string + +import mindspore._c_dataengine as de_map +import mindspore.dataset as ds +from mindspore._c_dataengine import InterpolationMode + +from write_mindrecord import write_mindrecord_tutorial + +MINDRECORD_FILE_NAME = "./imagenet.mindrecord" + +def create_dataset_using_mindrecord_tutorial(): + columns_list = ["data", "file_name", "label"] + num_readers = 4 + data_set = ds.MindDataset(MINDRECORD_FILE_NAME, columns_list, num_readers) + + # add your data enhance code here + + assert data_set.get_dataset_size() == 20 + data_set = data_set.repeat(2) + + num_iter = 0 + for item in data_set.create_dict_iterator(): + print("-------------- index {} -----------------".format(num_iter)) + # print("-------------- item[label]: {} ---------------------".format(item["label"])) + # print("-------------- item[data]: {} ----------------------".format(item["data"])) + num_iter += 1 + assert num_iter == 40 + +if __name__ == '__main__': + write_mindrecord_tutorial() + + create_dataset_using_mindrecord_tutorial() + + os.remove(MINDRECORD_FILE_NAME) + os.remove(MINDRECORD_FILE_NAME + ".db") diff --git a/chapter14/create_dataset_using_mnist.py b/chapter14/create_dataset_using_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..1b9d9cfe5b58c523fc4f7890300b4bdc198b5bf1 --- /dev/null +++ b/chapter14/create_dataset_using_mnist.py @@ -0,0 +1,94 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os +from easydict import EasyDict as edict + +import mindspore.dataset as de +import mindspore.dataset.transforms.c_transforms as C +import mindspore.dataset.transforms.vision.c_transforms as VC +from mindspore.dataset.transforms.vision.utils import Inter +from mindspore.common import dtype as mstype +import utils + +MNIST_URL = "http://yann.lecun.com/exdb/mnist/" + +MNIST_CONFIG = edict({ + 'num_classes': 10, + 'lr': 0.01, + 'momentum': 0.9, + 'epoch_size': 1, + 'batch_size': 32, + 'repeat_size': 1, + 'buffer_size': 1000, + 'image_height': 32, + 'image_width': 32, + 'save_checkpoint_steps': 1875, + 'keep_checkpoint_max': 10, +}) + +def download_mnist(target_directory=None): + if target_directory is None: + target_directory = utils.create_data_cache_dir() + + ##create mnst directory + target_directory = os.path.join(target_directory, "mnist") + try: + if not os.path.exists(target_directory): + os.mkdir(target_directory) + except OSError: + print("Creation of the directory %s failed" % target_directory) + + files = ['train-images-idx3-ubyte.gz', + 'train-labels-idx1-ubyte.gz', + 't10k-images-idx3-ubyte.gz', + 't10k-labels-idx1-ubyte.gz'] + utils.download_and_uncompress(files, MNIST_URL, target_directory, is_tar=False) + + return target_directory, os.path.join(target_directory, "datasetSchema.json") + +def create_mnist_dataset(mnist_dir, num_parallel_workers=1): + ds = de.MnistDataset(mnist_dir) + + # apply map operations on images + ds = ds.map(input_columns="label", operations=C.TypeCast(mstype.int32)) + ds = ds.map(input_columns="image", + operations=VC.Resize((MNIST_CONFIG.image_height, MNIST_CONFIG.image_width), + interpolation=Inter.LINEAR), + num_parallel_workers=num_parallel_workers) + ds = ds.map(input_columns="image", + operations=VC.Rescale(1 / 0.3081, -1 * 0.1307 / 0.3081), + num_parallel_workers=num_parallel_workers) + ds = ds.map(input_columns="image", + operations=VC.Rescale(1.0 / 255.0, 0.0), + num_parallel_workers=num_parallel_workers) + ds = ds.map(input_columns="image", + operations=VC.HWC2CHW(), + num_parallel_workers=num_parallel_workers) + + # apply DatasetOps + ds = ds.shuffle(buffer_size=MNIST_CONFIG.buffer_size) # 10000 as in LeNet train script + ds = ds.batch(MNIST_CONFIG.batch_size, drop_remainder=True) + ds = ds.repeat(MNIST_CONFIG.repeat_size) + + return ds + +if __name__ == "__main__": + mnistDir, _ = download_mnist() + data_set = create_mnist_dataset(mnistDir, 2) + for data in data_set.create_dict_iterator(): + print(data['image'].shape) + print(data['label']) + print('------------') diff --git a/chapter14/search_mindrecord.py b/chapter14/search_mindrecord.py new file mode 100644 index 0000000000000000000000000000000000000000..1c0f5b1ac93f2add170ec633e481a245ce5284f2 --- /dev/null +++ b/chapter14/search_mindrecord.py @@ -0,0 +1,53 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import uuid +from mindspore.mindrecord import MindPage, SUCCESS + +from write_mindrecord import write_mindrecord_tutorial + +MINDRECORD_FILE_NAME = "./imagenet.mindrecord" + +def search_mindrecord_tutorial(): + reader = MindPage(MINDRECORD_FILE_NAME) + fields = reader.get_category_fields() + assert fields == ['file_name', 'label'], \ + 'failed on getting candidate category fields.' + + ret = reader.set_category_field("label") + assert ret == SUCCESS, 'failed on setting category field.' + + info = reader.read_category_info() + # print("category info: {}".format(info)) + + row = reader.read_at_page_by_id(0, 0, 1) + assert len(row) == 1 + assert len(row[0]) == 3 + # print("row[0]: {}".format(row[0])) + + row1 = reader.read_at_page_by_name("2", 0, 2) + assert len(row1) == 2 + assert len(row1[0]) == 3 + # print("row1[0]: {}".format(row1[0])) + # print("row1[1]: {}".format(row1[1])) + +if __name__ == '__main__': + write_mindrecord_tutorial() + + search_mindrecord_tutorial() + + os.remove(MINDRECORD_FILE_NAME) + os.remove(MINDRECORD_FILE_NAME + ".db") diff --git a/chapter14/utils.py b/chapter14/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0d4a16405fd9b8d49ada30de83f492630bd84fbc --- /dev/null +++ b/chapter14/utils.py @@ -0,0 +1,109 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import collections +import json +import os +import re +import string +import numpy as np +import urllib +import urllib.request + +def get_data(dir_name): + """ + Get data from imagenet as dict. + + Yields: + data (dict of list): imagenet data list which contains dict. + """ + map_file = os.path.join(dir_name, "labels_map.txt") + + if not os.path.exists(map_file): + raise Exception("map file {} not exists".format(map_file)) + + label_dict = {} + with open(map_file) as fp: + line = fp.readline() + while line: + labels = line.split(" ") + label_dict[labels[1]] = labels[0] + line = fp.readline() + + # get all the dir which are n02087046, n02094114, n02109525, ... + dir_paths = {} + image_dir = os.path.join(dir_name, "images") + for item in label_dict: + real_path = os.path.join(image_dir, label_dict[item]) + if not os.path.isdir(real_path): + print("warning: {} dir is not exist".format(real_path)) + continue + dir_paths[item] = real_path + + if not dir_paths: + raise Exception("not valid image dir in {}".format(image_dir)) + + # get the filename, label and image binary as a dict + data_list = [] + for label in dir_paths: + for item in os.listdir(dir_paths[label]): + file_name = os.path.join(dir_paths[label], item) + if not item.endswith("JPEG") and not item.endswith("jpg"): + print("warning: {} file is not suffix with JPEG/jpg, skip it.".format(file_name)) + continue + data = {} + data["file_name"] = str(file_name) + data["label"] = int(label) + + # get the image data + image_file = open(file_name, "rb") + image_bytes = image_file.read() + image_file.close() + data["data"] = image_bytes + + data_list.append(data) + return data_list + +def create_data_cache_dir(): + cwd = os.getcwd() + target_directory = os.path.join(cwd, "data_cache") + try: + if not os.path.exists(target_directory): + os.mkdir(target_directory) + except OSError: + print("Creation of the directory %s failed" % target_directory) + return target_directory + +def download_and_uncompress(files, source_url, target_directory, is_tar=False): + for f in files: + url = source_url + f + target_file = os.path.join(target_directory, f) + + ##check if file already downloaded + if not (os.path.exists(target_file) or os.path.exists(target_file[:-3])): + urllib.request.urlretrieve(url, target_file) + if is_tar: + print("extracting from local tar file " + target_file) + rc = os.system("tar -C " + target_directory + " -xvf " + target_file) + else: + print("unzipping " + target_file) + rc = os.system("gunzip -f " + target_file) + if rc != 0: + print("Failed to uncompress ", target_file, " removing") + os.system("rm " + target_file) + ##exit with error so that build script will fail + raise SystemError + else: + print("Using cached dataset at ", target_file) diff --git a/chapter14/write_mindrecord.py b/chapter14/write_mindrecord.py new file mode 100644 index 0000000000000000000000000000000000000000..8e7bd0bbc36d9366f7b9ee59b197d61e562a0bbb --- /dev/null +++ b/chapter14/write_mindrecord.py @@ -0,0 +1,47 @@ +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import uuid +from mindspore.mindrecord import FileWriter, FileReader +from utils import get_data + +MINDRECORD_FILE_NAME = "./imagenet.mindrecord" + +def write_mindrecord_tutorial(): + writer = FileWriter(MINDRECORD_FILE_NAME) + data = get_data("./ImageNetDataSimulation") + schema_json = {"file_name": {"type": "string"}, + "label": {"type": "int64"}, + "data": {"type": "bytes"}} + writer.add_schema(schema_json, "img_schema") + writer.add_index(["file_name", "label"]) + writer.write_raw_data(data) + writer.commit() + + reader = FileReader(MINDRECORD_FILE_NAME) + count = 0 + for index, x in enumerate(reader.get_next()): + assert len(x) == 3 + count = count + 1 + # print("#item {}: {}".format(index, x)) + assert count == 20 + reader.close() + +if __name__ == '__main__': + write_mindrecord_tutorial() + + os.remove(MINDRECORD_FILE_NAME) + os.remove(MINDRECORD_FILE_NAME + ".db")